From f8301e6f89360ce7ef9b2ab1d1ddd395954cff12 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Tue, 15 Mar 2022 11:04:50 +0800 Subject: [PATCH] Add benchmark. --- paddleaudio/setup.py | 21 ++- paddleaudio/tests/benchmark/README.md | 34 ++--- .../{features.py => log_melspectrogram.py} | 124 +----------------- paddleaudio/tests/benchmark/melspectrogram.py | 108 +++++++++++++++ paddleaudio/tests/benchmark/mfcc.py | 122 +++++++++++++++++ 5 files changed, 265 insertions(+), 144 deletions(-) rename paddleaudio/tests/benchmark/{features.py => log_melspectrogram.py} (52%) create mode 100644 paddleaudio/tests/benchmark/melspectrogram.py create mode 100644 paddleaudio/tests/benchmark/mfcc.py diff --git a/paddleaudio/setup.py b/paddleaudio/setup.py index 59c84e65..b0ccd2eb 100644 --- a/paddleaudio/setup.py +++ b/paddleaudio/setup.py @@ -11,26 +11,37 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import glob +import os + import setuptools from setuptools.command.install import install -from setuptools.command.test import test as TestCommand +from setuptools.command.test import test # set the version here VERSION = '0.2.0' # Inspired by the example at https://pytest.org/latest/goodpractises.html -class NoseTestCommand(TestCommand): +class TestCommand(test): def finalize_options(self): - TestCommand.finalize_options(self) + test.finalize_options(self) self.test_args = [] self.test_suite = True + def run(self): + self.run_benchmark() + super(TestCommand, self).run() + def run_tests(self): # Run nose ensuring that argv simulates running nosetests directly import nose nose.run_exit(argv=['nosetests', '-w', 'tests']) + def run_benchmark(self): + for benchmark_item in glob.glob('tests/benchmark/*py'): + os.system(f'pytest {benchmark_item}') + class InstallCommand(install): def run(self): @@ -84,11 +95,11 @@ setuptools.setup( ], setup_requires=[ 'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1', - 'torchaudio==0.10.2' + 'torchaudio==0.10.2', 'pytest-benchmark' ], cmdclass={ 'install': InstallCommand, - 'test': NoseTestCommand, + 'test': TestCommand, }, ) remove_version_py() diff --git a/paddleaudio/tests/benchmark/README.md b/paddleaudio/tests/benchmark/README.md index b391788b..b9034100 100644 --- a/paddleaudio/tests/benchmark/README.md +++ b/paddleaudio/tests/benchmark/README.md @@ -7,7 +7,7 @@ pip install pytest-benchmark # 2. Run Run the specific script for profiling. ```sh -pytest features.py +pytest melspectrogram.py ``` Result: @@ -17,31 +17,23 @@ platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0 benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000) rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0 -collected 12 items +collected 4 items -features.py ............ [100%] +melspectrogram.py .... [100%] ----------------------------------------------------------------------------------------------------- benchmark: 12 tests ---------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ -test_melspect_gpu_torchaudio 210.7229 (1.0) 338.5879 (1.0) 217.4949 (1.0) 11.3591 (1.02) 214.0319 (1.0) 8.3707 (1.0) 6;5 4,597.8093 (1.0) 186 1 -test_log_melspect_gpu_torchaudio 375.4422 (1.78) 1,024.8050 (3.03) 387.3589 (1.78) 18.7080 (1.69) 385.2872 (1.80) 9.4259 (1.13) 31;31 2,581.5853 (0.56) 1420 1 -test_mfcc_gpu_torchaudio 422.4107 (2.00) 700.7364 (2.07) 454.9903 (2.09) 47.3926 (4.27) 436.6031 (2.04) 15.4376 (1.84) 159;193 2,197.8493 (0.48) 1078 1 -test_melspect_gpu 819.3776 (3.89) 1,161.9311 (3.43) 900.9168 (4.14) 147.0245 (13.26) 830.7453 (3.88) 115.4500 (13.79) 1;1 1,109.9805 (0.24) 5 1 -test_log_melspect_gpu 1,197.9323 (5.68) 1,280.0004 (3.78) 1,214.0182 (5.58) 11.0918 (1.0) 1,211.6358 (5.66) 10.0820 (1.20) 84;31 823.7109 (0.18) 533 1 -test_mfcc_gpu 1,337.0719 (6.35) 1,601.5675 (4.73) 1,355.4527 (6.23) 26.4458 (2.38) 1,348.6911 (6.30) 13.1410 (1.57) 16;17 737.7609 (0.16) 193 1 -test_melspect_cpu_torchaudio 1,374.8817 (6.52) 3,937.5033 (11.63) 1,574.8930 (7.24) 355.4223 (32.04) 1,409.1432 (6.58) 193.7435 (23.15) 36;49 634.9638 (0.14) 291 1 -test_log_melspect_cpu_torchaudio 1,390.2634 (6.60) 2,121.2976 (6.27) 1,559.3045 (7.17) 220.3090 (19.86) 1,409.4356 (6.59) 349.1524 (41.71) 106;0 641.3116 (0.14) 445 1 -test_mfcc_cpu_torchaudio 1,445.6678 (6.86) 3,801.8432 (11.23) 1,680.8559 (7.73) 395.5443 (35.66) 1,469.8748 (6.87) 305.6149 (36.51) 38;35 594.9350 (0.13) 469 1 -test_melspect_cpu 20,620.2641 (97.85) 20,984.0760 (61.98) 20,721.4942 (95.27) 70.2757 (6.34) 20,717.8025 (96.80) 57.8668 (6.91) 6;2 48.2591 (0.01) 30 1 -test_log_melspect_cpu 21,025.3932 (99.78) 48,894.0198 (144.41) 23,057.7049 (106.01) 5,440.3207 (490.48) 21,190.5045 (99.01) 190.0699 (22.71) 4;9 43.3695 (0.01) 44 1 -test_mfcc_cpu 21,127.2798 (100.26) 45,811.5358 (135.30) 23,176.4022 (106.56) 5,041.0751 (454.49) 21,319.1714 (99.61) 149.0396 (17.80) 5;9 43.1473 (0.01) 44 1 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +-------------------------------------------------------------------------------------------------- benchmark: 4 tests ------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_melspect_gpu_torchaudio 202.0765 (1.0) 360.6230 (1.0) 218.1168 (1.0) 16.3022 (1.0) 214.2871 (1.0) 21.8451 (1.0) 40;3 4,584.7001 (1.0) 286 1 +test_melspect_gpu 657.8509 (3.26) 908.0470 (2.52) 724.2545 (3.32) 106.5771 (6.54) 669.9096 (3.13) 113.4719 (5.19) 1;0 1,380.7300 (0.30) 5 1 +test_melspect_cpu_torchaudio 1,247.6053 (6.17) 2,892.5799 (8.02) 1,443.2853 (6.62) 345.3732 (21.19) 1,262.7263 (5.89) 221.6385 (10.15) 56;53 692.8637 (0.15) 399 1 +test_melspect_cpu 20,326.2549 (100.59) 20,607.8682 (57.15) 20,473.4125 (93.86) 63.8654 (3.92) 20,467.0429 (95.51) 68.4294 (3.13) 8;1 48.8438 (0.01) 29 1 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Legend: Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile. - OPS: Operations Per Second, computed as 1 / Mean - ========================================================================== 12 passed in 26.81s ========================================================================== + OPS: Operations Per Second, computed as 1 / Mean +========================================================================== 4 passed in 21.12s =========================================================================== ``` diff --git a/paddleaudio/tests/benchmark/features.py b/paddleaudio/tests/benchmark/log_melspectrogram.py similarity index 52% rename from paddleaudio/tests/benchmark/features.py rename to paddleaudio/tests/benchmark/log_melspectrogram.py index 30ef6f99..5230acd4 100644 --- a/paddleaudio/tests/benchmark/features.py +++ b/paddleaudio/tests/benchmark/log_melspectrogram.py @@ -37,11 +37,6 @@ mel_conf = { 'hop_length': 128, 'n_mels': 40, } -mfcc_conf = { - 'n_mfcc': 20, - 'top_db': 80.0, -} -mfcc_conf.update(mel_conf) mel_conf_torchaudio = { 'sample_rate': sr, @@ -51,10 +46,6 @@ mel_conf_torchaudio = { 'norm': 'slaney', 'mel_scale': 'slaney', } -mfcc_conf_torchaudio = { - 'sample_rate': sr, - 'n_mfcc': 20, -} def enable_cpu_device(): @@ -65,58 +56,6 @@ def enable_gpu_device(): paddle.set_device('gpu') -mel_extractor = paddleaudio.features.MelSpectrogram( - **mel_conf, f_min=0.0, dtype=waveform_tensor.dtype) - - -def melspectrogram(): - return mel_extractor(waveform_tensor).squeeze(0) - - -def test_melspect_cpu(benchmark): - enable_cpu_device() - feature_paddleaudio = benchmark(melspectrogram) - feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) - - -def test_melspect_gpu(benchmark): - enable_gpu_device() - feature_paddleaudio = benchmark(melspectrogram) - feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) - - -mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( - **mel_conf_torchaudio, f_min=0.0) - - -def melspectrogram_torchaudio(): - return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0) - - -def test_melspect_cpu_torchaudio(benchmark): - global waveform_tensor_torch, mel_extractor_torchaudio - mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu') - waveform_tensor_torch = waveform_tensor_torch.to('cpu') - feature_paddleaudio = benchmark(melspectrogram_torchaudio) - feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) - - -def test_melspect_gpu_torchaudio(benchmark): - global waveform_tensor_torch, mel_extractor_torchaudio - mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda') - waveform_tensor_torch = waveform_tensor_torch.to('cuda') - feature_torchaudio = benchmark(melspectrogram_torchaudio) - feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_torchaudio.cpu(), decimal=3) - - log_mel_extractor = paddleaudio.features.LogMelSpectrogram( **mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype) @@ -143,9 +82,15 @@ def test_log_melspect_gpu(benchmark): feature_librosa, feature_paddleaudio, decimal=2) +mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( + **mel_conf_torchaudio, f_min=0.0) amplitude_to_DB = torchaudio.transforms.AmplitudeToDB('power', top_db=80.0) +def melspectrogram_torchaudio(): + return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0) + + def log_melspectrogram_torchaudio(): mel_specgram = mel_extractor_torchaudio(waveform_tensor_torch) return amplitude_to_DB(mel_specgram).squeeze(0) @@ -177,60 +122,3 @@ def test_log_melspect_gpu_torchaudio(benchmark): feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( feature_librosa, feature_torchaudio.cpu(), decimal=2) - - -mfcc_extractor = paddleaudio.features.MFCC( - **mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype) - - -def mfcc(): - return mfcc_extractor(waveform_tensor).squeeze(0) - - -def test_mfcc_cpu(benchmark): - enable_cpu_device() - feature_paddleaudio = benchmark(mfcc) - feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) - - -def test_mfcc_gpu(benchmark): - enable_gpu_device() - feature_paddleaudio = benchmark(mfcc) - feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) - - -del mel_conf_torchaudio['sample_rate'] -mfcc_extractor_torchaudio = torchaudio.transforms.MFCC( - **mfcc_conf_torchaudio, melkwargs=mel_conf_torchaudio) - - -def mfcc_torchaudio(): - return mfcc_extractor_torchaudio(waveform_tensor_torch).squeeze(0) - - -def test_mfcc_cpu_torchaudio(benchmark): - global waveform_tensor_torch, mfcc_extractor_torchaudio - - mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu') - waveform_tensor_torch = waveform_tensor_torch.to('cpu') - - feature_paddleaudio = benchmark(mfcc_torchaudio) - feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) - - -def test_mfcc_gpu_torchaudio(benchmark): - global waveform_tensor_torch, mfcc_extractor_torchaudio - - mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cuda') - waveform_tensor_torch = waveform_tensor_torch.to('cuda') - - feature_torchaudio = benchmark(mfcc_torchaudio) - feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) - np.testing.assert_array_almost_equal( - feature_librosa, feature_torchaudio.cpu(), decimal=3) diff --git a/paddleaudio/tests/benchmark/melspectrogram.py b/paddleaudio/tests/benchmark/melspectrogram.py new file mode 100644 index 00000000..e0b79b45 --- /dev/null +++ b/paddleaudio/tests/benchmark/melspectrogram.py @@ -0,0 +1,108 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import urllib.request + +import librosa +import numpy as np +import paddle +import torch +import torchaudio + +import paddleaudio + +wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' +if not os.path.isfile(os.path.basename(wav_url)): + urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) + +waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) +waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) + +# Feature conf +mel_conf = { + 'sr': sr, + 'n_fft': 512, + 'hop_length': 128, + 'n_mels': 40, +} + +mel_conf_torchaudio = { + 'sample_rate': sr, + 'n_fft': 512, + 'hop_length': 128, + 'n_mels': 40, + 'norm': 'slaney', + 'mel_scale': 'slaney', +} + + +def enable_cpu_device(): + paddle.set_device('cpu') + + +def enable_gpu_device(): + paddle.set_device('gpu') + + +mel_extractor = paddleaudio.features.MelSpectrogram( + **mel_conf, f_min=0.0, dtype=waveform_tensor.dtype) + + +def melspectrogram(): + return mel_extractor(waveform_tensor).squeeze(0) + + +def test_melspect_cpu(benchmark): + enable_cpu_device() + feature_paddleaudio = benchmark(melspectrogram) + feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddleaudio, decimal=3) + + +def test_melspect_gpu(benchmark): + enable_gpu_device() + feature_paddleaudio = benchmark(melspectrogram) + feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddleaudio, decimal=3) + + +mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( + **mel_conf_torchaudio, f_min=0.0) + + +def melspectrogram_torchaudio(): + return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0) + + +def test_melspect_cpu_torchaudio(benchmark): + global waveform_tensor_torch, mel_extractor_torchaudio + mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu') + waveform_tensor_torch = waveform_tensor_torch.to('cpu') + feature_paddleaudio = benchmark(melspectrogram_torchaudio) + feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddleaudio, decimal=3) + + +def test_melspect_gpu_torchaudio(benchmark): + global waveform_tensor_torch, mel_extractor_torchaudio + mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda') + waveform_tensor_torch = waveform_tensor_torch.to('cuda') + feature_torchaudio = benchmark(melspectrogram_torchaudio) + feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_torchaudio.cpu(), decimal=3) diff --git a/paddleaudio/tests/benchmark/mfcc.py b/paddleaudio/tests/benchmark/mfcc.py new file mode 100644 index 00000000..2572ff33 --- /dev/null +++ b/paddleaudio/tests/benchmark/mfcc.py @@ -0,0 +1,122 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import urllib.request + +import librosa +import numpy as np +import paddle +import torch +import torchaudio + +import paddleaudio + +wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' +if not os.path.isfile(os.path.basename(wav_url)): + urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) + +waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) +waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) + +# Feature conf +mel_conf = { + 'sr': sr, + 'n_fft': 512, + 'hop_length': 128, + 'n_mels': 40, +} +mfcc_conf = { + 'n_mfcc': 20, + 'top_db': 80.0, +} +mfcc_conf.update(mel_conf) + +mel_conf_torchaudio = { + 'sample_rate': sr, + 'n_fft': 512, + 'hop_length': 128, + 'n_mels': 40, + 'norm': 'slaney', + 'mel_scale': 'slaney', +} +mfcc_conf_torchaudio = { + 'sample_rate': sr, + 'n_mfcc': 20, +} + + +def enable_cpu_device(): + paddle.set_device('cpu') + + +def enable_gpu_device(): + paddle.set_device('gpu') + + +mfcc_extractor = paddleaudio.features.MFCC( + **mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype) + + +def mfcc(): + return mfcc_extractor(waveform_tensor).squeeze(0) + + +def test_mfcc_cpu(benchmark): + enable_cpu_device() + feature_paddleaudio = benchmark(mfcc) + feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddleaudio, decimal=3) + + +def test_mfcc_gpu(benchmark): + enable_gpu_device() + feature_paddleaudio = benchmark(mfcc) + feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddleaudio, decimal=3) + + +del mel_conf_torchaudio['sample_rate'] +mfcc_extractor_torchaudio = torchaudio.transforms.MFCC( + **mfcc_conf_torchaudio, melkwargs=mel_conf_torchaudio) + + +def mfcc_torchaudio(): + return mfcc_extractor_torchaudio(waveform_tensor_torch).squeeze(0) + + +def test_mfcc_cpu_torchaudio(benchmark): + global waveform_tensor_torch, mfcc_extractor_torchaudio + + mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu') + waveform_tensor_torch = waveform_tensor_torch.to('cpu') + + feature_paddleaudio = benchmark(mfcc_torchaudio) + feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddleaudio, decimal=3) + + +def test_mfcc_gpu_torchaudio(benchmark): + global waveform_tensor_torch, mfcc_extractor_torchaudio + + mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cuda') + waveform_tensor_torch = waveform_tensor_torch.to('cuda') + + feature_torchaudio = benchmark(mfcc_torchaudio) + feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) + np.testing.assert_array_almost_equal( + feature_librosa, feature_torchaudio.cpu(), decimal=3)