parent
169040b4a2
commit
a85250cf16
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
@ -0,0 +1,34 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
|
||||||
|
multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
|
||||||
|
|
||||||
|
|
||||||
|
class BackendTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.initWavInput()
|
||||||
|
|
||||||
|
def initWavInput(self):
|
||||||
|
self.files = []
|
||||||
|
for url in [mono_channel_wav, multi_channels_wav]:
|
||||||
|
if not os.path.isfile(os.path.basename(url)):
|
||||||
|
urllib.request.urlretrieve(url, os.path.basename(url))
|
||||||
|
self.files.append(os.path.basename(url))
|
||||||
|
|
||||||
|
def initParmas(self):
|
||||||
|
raise NotImplementedError
|
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
@ -0,0 +1,73 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
import filecmp
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
import paddleaudio
|
||||||
|
from ..base import BackendTest
|
||||||
|
|
||||||
|
|
||||||
|
class TestIO(BackendTest):
|
||||||
|
def test_load_mono_channel(self):
|
||||||
|
sf_data, sf_sr = sf.read(self.files[0])
|
||||||
|
pa_data, pa_sr = paddleaudio.load(
|
||||||
|
self.files[0], normal=False, dtype='float64')
|
||||||
|
|
||||||
|
self.assertEqual(sf_data.dtype, pa_data.dtype)
|
||||||
|
self.assertEqual(sf_sr, pa_sr)
|
||||||
|
np.testing.assert_array_almost_equal(sf_data, pa_data)
|
||||||
|
|
||||||
|
def test_load_multi_channels(self):
|
||||||
|
sf_data, sf_sr = sf.read(self.files[1])
|
||||||
|
sf_data = sf_data.T # Channel dim first
|
||||||
|
pa_data, pa_sr = paddleaudio.load(
|
||||||
|
self.files[1], mono=False, normal=False, dtype='float64')
|
||||||
|
|
||||||
|
self.assertEqual(sf_data.dtype, pa_data.dtype)
|
||||||
|
self.assertEqual(sf_sr, pa_sr)
|
||||||
|
np.testing.assert_array_almost_equal(sf_data, pa_data)
|
||||||
|
|
||||||
|
def test_save_mono_channel(self):
|
||||||
|
waveform, sr = np.random.randint(
|
||||||
|
low=-32768, high=32768, size=(48000), dtype=np.int16), 16000
|
||||||
|
sf_tmp_file = 'sf_tmp.wav'
|
||||||
|
pa_tmp_file = 'pa_tmp.wav'
|
||||||
|
|
||||||
|
sf.write(sf_tmp_file, waveform, sr)
|
||||||
|
paddleaudio.save(waveform, sr, pa_tmp_file)
|
||||||
|
|
||||||
|
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
|
||||||
|
for file in [sf_tmp_file, pa_tmp_file]:
|
||||||
|
os.remove(file)
|
||||||
|
|
||||||
|
def test_save_multi_channels(self):
|
||||||
|
waveform, sr = np.random.randint(
|
||||||
|
low=-32768, high=32768, size=(2, 48000), dtype=np.int16), 16000
|
||||||
|
sf_tmp_file = 'sf_tmp.wav'
|
||||||
|
pa_tmp_file = 'pa_tmp.wav'
|
||||||
|
|
||||||
|
sf.write(sf_tmp_file, waveform.T, sr)
|
||||||
|
paddleaudio.save(waveform.T, sr, pa_tmp_file)
|
||||||
|
|
||||||
|
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
|
||||||
|
for file in [sf_tmp_file, pa_tmp_file]:
|
||||||
|
os.remove(file)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -0,0 +1,227 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import paddle
|
||||||
|
|
||||||
|
import paddleaudio
|
||||||
|
from .base import FeatTest
|
||||||
|
from paddleaudio.functional.window import get_window
|
||||||
|
|
||||||
|
|
||||||
|
class TestLibrosa(FeatTest):
|
||||||
|
def initParmas(self):
|
||||||
|
self.n_fft = 512
|
||||||
|
self.hop_length = 128
|
||||||
|
self.n_mels = 40
|
||||||
|
self.fmin = 0.0
|
||||||
|
self.window_str = 'hann'
|
||||||
|
self.pad_mode = 'reflect'
|
||||||
|
|
||||||
|
def test_stft(self):
|
||||||
|
if len(self.waveform.shape) == 2: # (C, T)
|
||||||
|
self.waveform = self.waveform.squeeze(
|
||||||
|
0) # 1D input for librosa.feature.melspectrogram
|
||||||
|
|
||||||
|
feature_librosa = librosa.core.stft(
|
||||||
|
y=self.waveform,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
win_length=None,
|
||||||
|
window=self.window_str,
|
||||||
|
center=True,
|
||||||
|
dtype=None,
|
||||||
|
pad_mode=self.pad_mode, )
|
||||||
|
x = paddle.to_tensor(self.waveform).unsqueeze(0)
|
||||||
|
window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
|
||||||
|
feature_paddle = paddle.signal.stft(
|
||||||
|
x=x,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
win_length=None,
|
||||||
|
window=window,
|
||||||
|
center=True,
|
||||||
|
pad_mode=self.pad_mode,
|
||||||
|
normalized=False,
|
||||||
|
onesided=True, ).squeeze(0)
|
||||||
|
|
||||||
|
np.testing.assert_array_almost_equal(
|
||||||
|
feature_librosa, feature_paddle, decimal=5)
|
||||||
|
|
||||||
|
def test_istft(self):
|
||||||
|
if len(self.waveform.shape) == 2: # (C, T)
|
||||||
|
self.waveform = self.waveform.squeeze(
|
||||||
|
0) # 1D input for librosa.feature.melspectrogram
|
||||||
|
|
||||||
|
# Get stft result from librosa.
|
||||||
|
stft_matrix = librosa.core.stft(
|
||||||
|
y=self.waveform,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
win_length=None,
|
||||||
|
window=self.window_str,
|
||||||
|
center=True,
|
||||||
|
pad_mode=self.pad_mode, )
|
||||||
|
|
||||||
|
feature_librosa = librosa.core.istft(
|
||||||
|
stft_matrix=stft_matrix,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
win_length=None,
|
||||||
|
window=self.window_str,
|
||||||
|
center=True,
|
||||||
|
dtype=None,
|
||||||
|
length=None, )
|
||||||
|
|
||||||
|
x = paddle.to_tensor(stft_matrix).unsqueeze(0)
|
||||||
|
window = get_window(
|
||||||
|
self.window_str,
|
||||||
|
self.n_fft,
|
||||||
|
dtype=paddle.to_tensor(self.waveform).dtype)
|
||||||
|
feature_paddle = paddle.signal.istft(
|
||||||
|
x=x,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
win_length=None,
|
||||||
|
window=window,
|
||||||
|
center=True,
|
||||||
|
normalized=False,
|
||||||
|
onesided=True,
|
||||||
|
length=None,
|
||||||
|
return_complex=False, ).squeeze(0)
|
||||||
|
|
||||||
|
np.testing.assert_array_almost_equal(
|
||||||
|
feature_librosa, feature_paddle, decimal=5)
|
||||||
|
|
||||||
|
def test_mel(self):
|
||||||
|
feature_librosa = librosa.filters.mel(
|
||||||
|
sr=self.sr,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
fmin=self.fmin,
|
||||||
|
fmax=None,
|
||||||
|
htk=False,
|
||||||
|
norm='slaney',
|
||||||
|
dtype=self.waveform.dtype, )
|
||||||
|
feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix(
|
||||||
|
sr=self.sr,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
fmin=self.fmin,
|
||||||
|
fmax=None,
|
||||||
|
htk=False,
|
||||||
|
norm='slaney',
|
||||||
|
dtype=self.waveform.dtype, )
|
||||||
|
x = paddle.to_tensor(self.waveform)
|
||||||
|
feature_functional = paddleaudio.functional.compute_fbank_matrix(
|
||||||
|
sr=self.sr,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
f_min=self.fmin,
|
||||||
|
f_max=None,
|
||||||
|
htk=False,
|
||||||
|
norm='slaney',
|
||||||
|
dtype=x.dtype, )
|
||||||
|
|
||||||
|
np.testing.assert_array_almost_equal(feature_librosa,
|
||||||
|
feature_compliance)
|
||||||
|
np.testing.assert_array_almost_equal(feature_librosa,
|
||||||
|
feature_functional)
|
||||||
|
|
||||||
|
def test_melspect(self):
|
||||||
|
if len(self.waveform.shape) == 2: # (C, T)
|
||||||
|
self.waveform = self.waveform.squeeze(
|
||||||
|
0) # 1D input for librosa.feature.melspectrogram
|
||||||
|
|
||||||
|
# librosa:
|
||||||
|
feature_librosa = librosa.feature.melspectrogram(
|
||||||
|
y=self.waveform,
|
||||||
|
sr=self.sr,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
fmin=self.fmin)
|
||||||
|
|
||||||
|
# paddleaudio.compliance.librosa:
|
||||||
|
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
|
||||||
|
x=self.waveform,
|
||||||
|
sr=self.sr,
|
||||||
|
window_size=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
fmin=self.fmin,
|
||||||
|
to_db=False)
|
||||||
|
|
||||||
|
# paddleaudio.features.layer
|
||||||
|
x = paddle.to_tensor(
|
||||||
|
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
|
||||||
|
feature_extractor = paddleaudio.features.MelSpectrogram(
|
||||||
|
sr=self.sr,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
f_min=self.fmin,
|
||||||
|
dtype=x.dtype)
|
||||||
|
feature_layer = feature_extractor(x).squeeze(0).numpy()
|
||||||
|
|
||||||
|
np.testing.assert_array_almost_equal(
|
||||||
|
feature_librosa, feature_compliance, decimal=5)
|
||||||
|
np.testing.assert_array_almost_equal(
|
||||||
|
feature_librosa, feature_layer, decimal=5)
|
||||||
|
|
||||||
|
def test_log_melspect(self):
|
||||||
|
if len(self.waveform.shape) == 2: # (C, T)
|
||||||
|
self.waveform = self.waveform.squeeze(
|
||||||
|
0) # 1D input for librosa.feature.melspectrogram
|
||||||
|
|
||||||
|
# librosa:
|
||||||
|
feature_librosa = librosa.feature.melspectrogram(
|
||||||
|
y=self.waveform,
|
||||||
|
sr=self.sr,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
fmin=self.fmin)
|
||||||
|
feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
|
||||||
|
|
||||||
|
# paddleaudio.compliance.librosa:
|
||||||
|
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
|
||||||
|
x=self.waveform,
|
||||||
|
sr=self.sr,
|
||||||
|
window_size=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
fmin=self.fmin)
|
||||||
|
|
||||||
|
# paddleaudio.features.layer
|
||||||
|
x = paddle.to_tensor(
|
||||||
|
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
|
||||||
|
feature_extractor = paddleaudio.features.LogMelSpectrogram(
|
||||||
|
sr=self.sr,
|
||||||
|
n_fft=self.n_fft,
|
||||||
|
hop_length=self.hop_length,
|
||||||
|
n_mels=self.n_mels,
|
||||||
|
f_min=self.fmin,
|
||||||
|
dtype=x.dtype)
|
||||||
|
feature_layer = feature_extractor(x).squeeze(0).numpy()
|
||||||
|
|
||||||
|
np.testing.assert_array_almost_equal(
|
||||||
|
feature_librosa, feature_compliance, decimal=5)
|
||||||
|
np.testing.assert_array_almost_equal(
|
||||||
|
feature_librosa, feature_layer, decimal=4)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in new issue