Merge pull request #1548 from KPatr1ck/audio
[Audio][Feature][Unittest][Benchmark]Add unittest and benchmark for feature apis.pull/1569/head
commit
f8f995a2a5
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,34 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import unittest
|
||||
import urllib.request
|
||||
|
||||
mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
|
||||
multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
|
||||
|
||||
|
||||
class BackendTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.initWavInput()
|
||||
|
||||
def initWavInput(self):
|
||||
self.files = []
|
||||
for url in [mono_channel_wav, multi_channels_wav]:
|
||||
if not os.path.isfile(os.path.basename(url)):
|
||||
urllib.request.urlretrieve(url, os.path.basename(url))
|
||||
self.files.append(os.path.basename(url))
|
||||
|
||||
def initParmas(self):
|
||||
raise NotImplementedError
|
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,73 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import filecmp
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
|
||||
import paddleaudio
|
||||
from ..base import BackendTest
|
||||
|
||||
|
||||
class TestIO(BackendTest):
|
||||
def test_load_mono_channel(self):
|
||||
sf_data, sf_sr = sf.read(self.files[0])
|
||||
pa_data, pa_sr = paddleaudio.load(
|
||||
self.files[0], normal=False, dtype='float64')
|
||||
|
||||
self.assertEqual(sf_data.dtype, pa_data.dtype)
|
||||
self.assertEqual(sf_sr, pa_sr)
|
||||
np.testing.assert_array_almost_equal(sf_data, pa_data)
|
||||
|
||||
def test_load_multi_channels(self):
|
||||
sf_data, sf_sr = sf.read(self.files[1])
|
||||
sf_data = sf_data.T # Channel dim first
|
||||
pa_data, pa_sr = paddleaudio.load(
|
||||
self.files[1], mono=False, normal=False, dtype='float64')
|
||||
|
||||
self.assertEqual(sf_data.dtype, pa_data.dtype)
|
||||
self.assertEqual(sf_sr, pa_sr)
|
||||
np.testing.assert_array_almost_equal(sf_data, pa_data)
|
||||
|
||||
def test_save_mono_channel(self):
|
||||
waveform, sr = np.random.randint(
|
||||
low=-32768, high=32768, size=(48000), dtype=np.int16), 16000
|
||||
sf_tmp_file = 'sf_tmp.wav'
|
||||
pa_tmp_file = 'pa_tmp.wav'
|
||||
|
||||
sf.write(sf_tmp_file, waveform, sr)
|
||||
paddleaudio.save(waveform, sr, pa_tmp_file)
|
||||
|
||||
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
|
||||
for file in [sf_tmp_file, pa_tmp_file]:
|
||||
os.remove(file)
|
||||
|
||||
def test_save_multi_channels(self):
|
||||
waveform, sr = np.random.randint(
|
||||
low=-32768, high=32768, size=(2, 48000), dtype=np.int16), 16000
|
||||
sf_tmp_file = 'sf_tmp.wav'
|
||||
pa_tmp_file = 'pa_tmp.wav'
|
||||
|
||||
sf.write(sf_tmp_file, waveform.T, sr)
|
||||
paddleaudio.save(waveform.T, sr, pa_tmp_file)
|
||||
|
||||
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
|
||||
for file in [sf_tmp_file, pa_tmp_file]:
|
||||
os.remove(file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,39 @@
|
||||
# 1. Prepare
|
||||
First, install `pytest-benchmark` via pip.
|
||||
```sh
|
||||
pip install pytest-benchmark
|
||||
```
|
||||
|
||||
# 2. Run
|
||||
Run the specific script for profiling.
|
||||
```sh
|
||||
pytest melspectrogram.py
|
||||
```
|
||||
|
||||
Result:
|
||||
```sh
|
||||
========================================================================== test session starts ==========================================================================
|
||||
platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0
|
||||
benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
|
||||
rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio
|
||||
plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0
|
||||
collected 4 items
|
||||
|
||||
melspectrogram.py .... [100%]
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------------------------- benchmark: 4 tests -------------------------------------------------------------------------------------------------
|
||||
Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
|
||||
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
test_melspect_gpu_torchaudio 202.0765 (1.0) 360.6230 (1.0) 218.1168 (1.0) 16.3022 (1.0) 214.2871 (1.0) 21.8451 (1.0) 40;3 4,584.7001 (1.0) 286 1
|
||||
test_melspect_gpu 657.8509 (3.26) 908.0470 (2.52) 724.2545 (3.32) 106.5771 (6.54) 669.9096 (3.13) 113.4719 (5.19) 1;0 1,380.7300 (0.30) 5 1
|
||||
test_melspect_cpu_torchaudio 1,247.6053 (6.17) 2,892.5799 (8.02) 1,443.2853 (6.62) 345.3732 (21.19) 1,262.7263 (5.89) 221.6385 (10.15) 56;53 692.8637 (0.15) 399 1
|
||||
test_melspect_cpu 20,326.2549 (100.59) 20,607.8682 (57.15) 20,473.4125 (93.86) 63.8654 (3.92) 20,467.0429 (95.51) 68.4294 (3.13) 8;1 48.8438 (0.01) 29 1
|
||||
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
Legend:
|
||||
Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.
|
||||
OPS: Operations Per Second, computed as 1 / Mean
|
||||
========================================================================== 4 passed in 21.12s ===========================================================================
|
||||
|
||||
```
|
@ -0,0 +1,124 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import urllib.request
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import paddle
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
import paddleaudio
|
||||
|
||||
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
|
||||
if not os.path.isfile(os.path.basename(wav_url)):
|
||||
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
|
||||
|
||||
waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
|
||||
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
|
||||
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
|
||||
|
||||
# Feature conf
|
||||
mel_conf = {
|
||||
'sr': sr,
|
||||
'n_fft': 512,
|
||||
'hop_length': 128,
|
||||
'n_mels': 40,
|
||||
}
|
||||
|
||||
mel_conf_torchaudio = {
|
||||
'sample_rate': sr,
|
||||
'n_fft': 512,
|
||||
'hop_length': 128,
|
||||
'n_mels': 40,
|
||||
'norm': 'slaney',
|
||||
'mel_scale': 'slaney',
|
||||
}
|
||||
|
||||
|
||||
def enable_cpu_device():
|
||||
paddle.set_device('cpu')
|
||||
|
||||
|
||||
def enable_gpu_device():
|
||||
paddle.set_device('gpu')
|
||||
|
||||
|
||||
log_mel_extractor = paddleaudio.features.LogMelSpectrogram(
|
||||
**mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype)
|
||||
|
||||
|
||||
def log_melspectrogram():
|
||||
return log_mel_extractor(waveform_tensor).squeeze(0)
|
||||
|
||||
|
||||
def test_log_melspect_cpu(benchmark):
|
||||
enable_cpu_device()
|
||||
feature_paddleaudio = benchmark(log_melspectrogram)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
def test_log_melspect_gpu(benchmark):
|
||||
enable_gpu_device()
|
||||
feature_paddleaudio = benchmark(log_melspectrogram)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=2)
|
||||
|
||||
|
||||
mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
|
||||
**mel_conf_torchaudio, f_min=0.0)
|
||||
amplitude_to_DB = torchaudio.transforms.AmplitudeToDB('power', top_db=80.0)
|
||||
|
||||
|
||||
def melspectrogram_torchaudio():
|
||||
return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0)
|
||||
|
||||
|
||||
def log_melspectrogram_torchaudio():
|
||||
mel_specgram = mel_extractor_torchaudio(waveform_tensor_torch)
|
||||
return amplitude_to_DB(mel_specgram).squeeze(0)
|
||||
|
||||
|
||||
def test_log_melspect_cpu_torchaudio(benchmark):
|
||||
global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB
|
||||
|
||||
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
|
||||
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
|
||||
amplitude_to_DB = amplitude_to_DB.to('cpu')
|
||||
|
||||
feature_paddleaudio = benchmark(log_melspectrogram_torchaudio)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
def test_log_melspect_gpu_torchaudio(benchmark):
|
||||
global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB
|
||||
|
||||
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
|
||||
waveform_tensor_torch = waveform_tensor_torch.to('cuda')
|
||||
amplitude_to_DB = amplitude_to_DB.to('cuda')
|
||||
|
||||
feature_torchaudio = benchmark(log_melspectrogram_torchaudio)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_torchaudio.cpu(), decimal=2)
|
@ -0,0 +1,108 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import urllib.request
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import paddle
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
import paddleaudio
|
||||
|
||||
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
|
||||
if not os.path.isfile(os.path.basename(wav_url)):
|
||||
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
|
||||
|
||||
waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
|
||||
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
|
||||
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
|
||||
|
||||
# Feature conf
|
||||
mel_conf = {
|
||||
'sr': sr,
|
||||
'n_fft': 512,
|
||||
'hop_length': 128,
|
||||
'n_mels': 40,
|
||||
}
|
||||
|
||||
mel_conf_torchaudio = {
|
||||
'sample_rate': sr,
|
||||
'n_fft': 512,
|
||||
'hop_length': 128,
|
||||
'n_mels': 40,
|
||||
'norm': 'slaney',
|
||||
'mel_scale': 'slaney',
|
||||
}
|
||||
|
||||
|
||||
def enable_cpu_device():
|
||||
paddle.set_device('cpu')
|
||||
|
||||
|
||||
def enable_gpu_device():
|
||||
paddle.set_device('gpu')
|
||||
|
||||
|
||||
mel_extractor = paddleaudio.features.MelSpectrogram(
|
||||
**mel_conf, f_min=0.0, dtype=waveform_tensor.dtype)
|
||||
|
||||
|
||||
def melspectrogram():
|
||||
return mel_extractor(waveform_tensor).squeeze(0)
|
||||
|
||||
|
||||
def test_melspect_cpu(benchmark):
|
||||
enable_cpu_device()
|
||||
feature_paddleaudio = benchmark(melspectrogram)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
def test_melspect_gpu(benchmark):
|
||||
enable_gpu_device()
|
||||
feature_paddleaudio = benchmark(melspectrogram)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
|
||||
**mel_conf_torchaudio, f_min=0.0)
|
||||
|
||||
|
||||
def melspectrogram_torchaudio():
|
||||
return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0)
|
||||
|
||||
|
||||
def test_melspect_cpu_torchaudio(benchmark):
|
||||
global waveform_tensor_torch, mel_extractor_torchaudio
|
||||
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
|
||||
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
|
||||
feature_paddleaudio = benchmark(melspectrogram_torchaudio)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
def test_melspect_gpu_torchaudio(benchmark):
|
||||
global waveform_tensor_torch, mel_extractor_torchaudio
|
||||
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
|
||||
waveform_tensor_torch = waveform_tensor_torch.to('cuda')
|
||||
feature_torchaudio = benchmark(melspectrogram_torchaudio)
|
||||
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_torchaudio.cpu(), decimal=3)
|
@ -0,0 +1,122 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import urllib.request
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import paddle
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
import paddleaudio
|
||||
|
||||
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
|
||||
if not os.path.isfile(os.path.basename(wav_url)):
|
||||
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
|
||||
|
||||
waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
|
||||
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
|
||||
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
|
||||
|
||||
# Feature conf
|
||||
mel_conf = {
|
||||
'sr': sr,
|
||||
'n_fft': 512,
|
||||
'hop_length': 128,
|
||||
'n_mels': 40,
|
||||
}
|
||||
mfcc_conf = {
|
||||
'n_mfcc': 20,
|
||||
'top_db': 80.0,
|
||||
}
|
||||
mfcc_conf.update(mel_conf)
|
||||
|
||||
mel_conf_torchaudio = {
|
||||
'sample_rate': sr,
|
||||
'n_fft': 512,
|
||||
'hop_length': 128,
|
||||
'n_mels': 40,
|
||||
'norm': 'slaney',
|
||||
'mel_scale': 'slaney',
|
||||
}
|
||||
mfcc_conf_torchaudio = {
|
||||
'sample_rate': sr,
|
||||
'n_mfcc': 20,
|
||||
}
|
||||
|
||||
|
||||
def enable_cpu_device():
|
||||
paddle.set_device('cpu')
|
||||
|
||||
|
||||
def enable_gpu_device():
|
||||
paddle.set_device('gpu')
|
||||
|
||||
|
||||
mfcc_extractor = paddleaudio.features.MFCC(
|
||||
**mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype)
|
||||
|
||||
|
||||
def mfcc():
|
||||
return mfcc_extractor(waveform_tensor).squeeze(0)
|
||||
|
||||
|
||||
def test_mfcc_cpu(benchmark):
|
||||
enable_cpu_device()
|
||||
feature_paddleaudio = benchmark(mfcc)
|
||||
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
def test_mfcc_gpu(benchmark):
|
||||
enable_gpu_device()
|
||||
feature_paddleaudio = benchmark(mfcc)
|
||||
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
del mel_conf_torchaudio['sample_rate']
|
||||
mfcc_extractor_torchaudio = torchaudio.transforms.MFCC(
|
||||
**mfcc_conf_torchaudio, melkwargs=mel_conf_torchaudio)
|
||||
|
||||
|
||||
def mfcc_torchaudio():
|
||||
return mfcc_extractor_torchaudio(waveform_tensor_torch).squeeze(0)
|
||||
|
||||
|
||||
def test_mfcc_cpu_torchaudio(benchmark):
|
||||
global waveform_tensor_torch, mfcc_extractor_torchaudio
|
||||
|
||||
mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu')
|
||||
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
|
||||
|
||||
feature_paddleaudio = benchmark(mfcc_torchaudio)
|
||||
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddleaudio, decimal=3)
|
||||
|
||||
|
||||
def test_mfcc_gpu_torchaudio(benchmark):
|
||||
global waveform_tensor_torch, mfcc_extractor_torchaudio
|
||||
|
||||
mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cuda')
|
||||
waveform_tensor_torch = waveform_tensor_torch.to('cuda')
|
||||
|
||||
feature_torchaudio = benchmark(mfcc_torchaudio)
|
||||
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_torchaudio.cpu(), decimal=3)
|
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,49 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import unittest
|
||||
import urllib.request
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from paddleaudio import load
|
||||
|
||||
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
|
||||
|
||||
|
||||
class FeatTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.initParmas()
|
||||
self.initWavInput()
|
||||
self.setUpDevice()
|
||||
|
||||
def setUpDevice(self, device='cpu'):
|
||||
paddle.set_device(device)
|
||||
|
||||
def initWavInput(self, url=wav_url):
|
||||
if not os.path.isfile(os.path.basename(url)):
|
||||
urllib.request.urlretrieve(url, os.path.basename(url))
|
||||
self.waveform, self.sr = load(os.path.abspath(os.path.basename(url)))
|
||||
self.waveform = self.waveform.astype(
|
||||
np.float32
|
||||
) # paddlespeech.s2t.transform.spectrogram only supports float32
|
||||
dim = len(self.waveform.shape)
|
||||
|
||||
assert dim in [1, 2]
|
||||
if dim == 1:
|
||||
self.waveform = np.expand_dims(self.waveform, 0)
|
||||
|
||||
def initParmas(self):
|
||||
raise NotImplementedError
|
@ -0,0 +1,49 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from .base import FeatTest
|
||||
from paddleaudio.functional.window import get_window
|
||||
from paddlespeech.s2t.transform.spectrogram import IStft
|
||||
from paddlespeech.s2t.transform.spectrogram import Stft
|
||||
|
||||
|
||||
class TestIstft(FeatTest):
|
||||
def initParmas(self):
|
||||
self.n_fft = 512
|
||||
self.hop_length = 128
|
||||
self.window_str = 'hann'
|
||||
|
||||
def test_istft(self):
|
||||
ps_stft = Stft(self.n_fft, self.hop_length)
|
||||
ps_res = ps_stft(
|
||||
self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frmaes)
|
||||
x = paddle.to_tensor(ps_res)
|
||||
|
||||
ps_istft = IStft(self.hop_length)
|
||||
ps_res = ps_istft(ps_res.T)
|
||||
|
||||
window = get_window(
|
||||
self.window_str, self.n_fft, dtype=self.waveform.dtype)
|
||||
pd_res = paddle.signal.istft(
|
||||
x, self.n_fft, self.hop_length, window=window)
|
||||
|
||||
np.testing.assert_array_almost_equal(ps_res, pd_res, decimal=5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,81 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
import paddleaudio
|
||||
from .base import FeatTest
|
||||
|
||||
|
||||
class TestKaldi(FeatTest):
|
||||
def initParmas(self):
|
||||
self.window_size = 1024
|
||||
self.dtype = 'float32'
|
||||
|
||||
def test_window(self):
|
||||
t_hann_window = torch.hann_window(
|
||||
self.window_size, periodic=False, dtype=eval(f'torch.{self.dtype}'))
|
||||
t_hamm_window = torch.hamming_window(
|
||||
self.window_size,
|
||||
periodic=False,
|
||||
alpha=0.54,
|
||||
beta=0.46,
|
||||
dtype=eval(f'torch.{self.dtype}'))
|
||||
t_povey_window = torch.hann_window(
|
||||
self.window_size, periodic=False,
|
||||
dtype=eval(f'torch.{self.dtype}')).pow(0.85)
|
||||
|
||||
p_hann_window = paddleaudio.functional.window.get_window(
|
||||
'hann',
|
||||
self.window_size,
|
||||
fftbins=False,
|
||||
dtype=eval(f'paddle.{self.dtype}'))
|
||||
p_hamm_window = paddleaudio.functional.window.get_window(
|
||||
'hamming',
|
||||
self.window_size,
|
||||
fftbins=False,
|
||||
dtype=eval(f'paddle.{self.dtype}'))
|
||||
p_povey_window = paddleaudio.functional.window.get_window(
|
||||
'hann',
|
||||
self.window_size,
|
||||
fftbins=False,
|
||||
dtype=eval(f'paddle.{self.dtype}')).pow(0.85)
|
||||
|
||||
np.testing.assert_array_almost_equal(t_hann_window, p_hann_window)
|
||||
np.testing.assert_array_almost_equal(t_hamm_window, p_hamm_window)
|
||||
np.testing.assert_array_almost_equal(t_povey_window, p_povey_window)
|
||||
|
||||
def test_fbank(self):
|
||||
ta_features = torchaudio.compliance.kaldi.fbank(
|
||||
torch.from_numpy(self.waveform.astype(self.dtype)))
|
||||
pa_features = paddleaudio.compliance.kaldi.fbank(
|
||||
paddle.to_tensor(self.waveform.astype(self.dtype)))
|
||||
np.testing.assert_array_almost_equal(
|
||||
ta_features, pa_features, decimal=4)
|
||||
|
||||
def test_mfcc(self):
|
||||
ta_features = torchaudio.compliance.kaldi.mfcc(
|
||||
torch.from_numpy(self.waveform.astype(self.dtype)))
|
||||
pa_features = paddleaudio.compliance.kaldi.mfcc(
|
||||
paddle.to_tensor(self.waveform.astype(self.dtype)))
|
||||
np.testing.assert_array_almost_equal(
|
||||
ta_features, pa_features, decimal=4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,281 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import unittest
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
import paddleaudio
|
||||
from .base import FeatTest
|
||||
from paddleaudio.functional.window import get_window
|
||||
|
||||
|
||||
class TestLibrosa(FeatTest):
|
||||
def initParmas(self):
|
||||
self.n_fft = 512
|
||||
self.hop_length = 128
|
||||
self.n_mels = 40
|
||||
self.n_mfcc = 20
|
||||
self.fmin = 0.0
|
||||
self.window_str = 'hann'
|
||||
self.pad_mode = 'reflect'
|
||||
self.top_db = 80.0
|
||||
|
||||
def test_stft(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
feature_librosa = librosa.core.stft(
|
||||
y=self.waveform,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=self.window_str,
|
||||
center=True,
|
||||
dtype=None,
|
||||
pad_mode=self.pad_mode, )
|
||||
x = paddle.to_tensor(self.waveform).unsqueeze(0)
|
||||
window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
|
||||
feature_paddle = paddle.signal.stft(
|
||||
x=x,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=window,
|
||||
center=True,
|
||||
pad_mode=self.pad_mode,
|
||||
normalized=False,
|
||||
onesided=True, ).squeeze(0)
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddle, decimal=5)
|
||||
|
||||
def test_istft(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
# Get stft result from librosa.
|
||||
stft_matrix = librosa.core.stft(
|
||||
y=self.waveform,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=self.window_str,
|
||||
center=True,
|
||||
pad_mode=self.pad_mode, )
|
||||
|
||||
feature_librosa = librosa.core.istft(
|
||||
stft_matrix=stft_matrix,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=self.window_str,
|
||||
center=True,
|
||||
dtype=None,
|
||||
length=None, )
|
||||
|
||||
x = paddle.to_tensor(stft_matrix).unsqueeze(0)
|
||||
window = get_window(
|
||||
self.window_str,
|
||||
self.n_fft,
|
||||
dtype=paddle.to_tensor(self.waveform).dtype)
|
||||
feature_paddle = paddle.signal.istft(
|
||||
x=x,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=window,
|
||||
center=True,
|
||||
normalized=False,
|
||||
onesided=True,
|
||||
length=None,
|
||||
return_complex=False, ).squeeze(0)
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddle, decimal=5)
|
||||
|
||||
def test_mel(self):
|
||||
feature_librosa = librosa.filters.mel(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin,
|
||||
fmax=None,
|
||||
htk=False,
|
||||
norm='slaney',
|
||||
dtype=self.waveform.dtype, )
|
||||
feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin,
|
||||
fmax=None,
|
||||
htk=False,
|
||||
norm='slaney',
|
||||
dtype=self.waveform.dtype, )
|
||||
x = paddle.to_tensor(self.waveform)
|
||||
feature_functional = paddleaudio.functional.compute_fbank_matrix(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
n_mels=self.n_mels,
|
||||
f_min=self.fmin,
|
||||
f_max=None,
|
||||
htk=False,
|
||||
norm='slaney',
|
||||
dtype=x.dtype, )
|
||||
|
||||
np.testing.assert_array_almost_equal(feature_librosa,
|
||||
feature_compliance)
|
||||
np.testing.assert_array_almost_equal(feature_librosa,
|
||||
feature_functional)
|
||||
|
||||
def test_melspect(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
# librosa:
|
||||
feature_librosa = librosa.feature.melspectrogram(
|
||||
y=self.waveform,
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin)
|
||||
|
||||
# paddleaudio.compliance.librosa:
|
||||
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
|
||||
x=self.waveform,
|
||||
sr=self.sr,
|
||||
window_size=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin,
|
||||
to_db=False)
|
||||
|
||||
# paddleaudio.features.layer
|
||||
x = paddle.to_tensor(
|
||||
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
|
||||
feature_extractor = paddleaudio.features.MelSpectrogram(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
f_min=self.fmin,
|
||||
dtype=x.dtype)
|
||||
feature_layer = feature_extractor(x).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_compliance, decimal=5)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_layer, decimal=5)
|
||||
|
||||
def test_log_melspect(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
# librosa:
|
||||
feature_librosa = librosa.feature.melspectrogram(
|
||||
y=self.waveform,
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin)
|
||||
feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
|
||||
|
||||
# paddleaudio.compliance.librosa:
|
||||
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
|
||||
x=self.waveform,
|
||||
sr=self.sr,
|
||||
window_size=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin)
|
||||
|
||||
# paddleaudio.features.layer
|
||||
x = paddle.to_tensor(
|
||||
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
|
||||
feature_extractor = paddleaudio.features.LogMelSpectrogram(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
f_min=self.fmin,
|
||||
dtype=x.dtype)
|
||||
feature_layer = feature_extractor(x).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_compliance, decimal=5)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_layer, decimal=4)
|
||||
|
||||
def test_mfcc(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
# librosa:
|
||||
feature_librosa = librosa.feature.mfcc(
|
||||
y=self.waveform,
|
||||
sr=self.sr,
|
||||
S=None,
|
||||
n_mfcc=self.n_mfcc,
|
||||
dct_type=2,
|
||||
norm='ortho',
|
||||
lifter=0,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin)
|
||||
|
||||
# paddleaudio.compliance.librosa:
|
||||
feature_compliance = paddleaudio.compliance.librosa.mfcc(
|
||||
x=self.waveform,
|
||||
sr=self.sr,
|
||||
n_mfcc=self.n_mfcc,
|
||||
dct_type=2,
|
||||
norm='ortho',
|
||||
lifter=0,
|
||||
window_size=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin,
|
||||
top_db=self.top_db)
|
||||
|
||||
# paddleaudio.features.layer
|
||||
x = paddle.to_tensor(
|
||||
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
|
||||
feature_extractor = paddleaudio.features.MFCC(
|
||||
sr=self.sr,
|
||||
n_mfcc=self.n_mfcc,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
f_min=self.fmin,
|
||||
top_db=self.top_db,
|
||||
dtype=x.dtype)
|
||||
feature_layer = feature_extractor(x).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_compliance, decimal=4)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_layer, decimal=4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,50 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
import paddleaudio
|
||||
from .base import FeatTest
|
||||
from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram
|
||||
|
||||
|
||||
class TestLogMelSpectrogram(FeatTest):
|
||||
def initParmas(self):
|
||||
self.n_fft = 512
|
||||
self.hop_length = 128
|
||||
self.n_mels = 40
|
||||
|
||||
def test_log_melspect(self):
|
||||
ps_melspect = LogMelSpectrogram(self.sr, self.n_mels, self.n_fft,
|
||||
self.hop_length)
|
||||
ps_res = ps_melspect(self.waveform.T).squeeze(1).T
|
||||
|
||||
x = paddle.to_tensor(self.waveform)
|
||||
# paddlespeech.s2t的特征存在幅度谱和功率谱滥用的情况
|
||||
ps_melspect = paddleaudio.features.LogMelSpectrogram(
|
||||
self.sr,
|
||||
self.n_fft,
|
||||
self.hop_length,
|
||||
power=1.0,
|
||||
n_mels=self.n_mels,
|
||||
f_min=0.0)
|
||||
pa_res = (ps_melspect(x) / 10.0).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(ps_res, pa_res, decimal=5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,42 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
import paddleaudio
|
||||
from .base import FeatTest
|
||||
from paddlespeech.s2t.transform.spectrogram import Spectrogram
|
||||
|
||||
|
||||
class TestSpectrogram(FeatTest):
|
||||
def initParmas(self):
|
||||
self.n_fft = 512
|
||||
self.hop_length = 128
|
||||
|
||||
def test_spectrogram(self):
|
||||
ps_spect = Spectrogram(self.n_fft, self.hop_length)
|
||||
ps_res = ps_spect(self.waveform.T).squeeze(1).T # Magnitude
|
||||
|
||||
x = paddle.to_tensor(self.waveform)
|
||||
pa_spect = paddleaudio.features.Spectrogram(
|
||||
self.n_fft, self.hop_length, power=1.0)
|
||||
pa_res = pa_spect(x).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(ps_res, pa_res, decimal=5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,44 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from .base import FeatTest
|
||||
from paddleaudio.functional.window import get_window
|
||||
from paddlespeech.s2t.transform.spectrogram import Stft
|
||||
|
||||
|
||||
class TestStft(FeatTest):
|
||||
def initParmas(self):
|
||||
self.n_fft = 512
|
||||
self.hop_length = 128
|
||||
self.window_str = 'hann'
|
||||
|
||||
def test_stft(self):
|
||||
ps_stft = Stft(self.n_fft, self.hop_length)
|
||||
ps_res = ps_stft(
|
||||
self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frmaes)
|
||||
|
||||
x = paddle.to_tensor(self.waveform)
|
||||
window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
|
||||
pd_res = paddle.signal.stft(
|
||||
x, self.n_fft, self.hop_length, window=window).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(ps_res, pd_res, decimal=5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue