From c3e0a8dd7a6a64fde39d4214f90e194d7947d39b Mon Sep 17 00:00:00 2001
From: KP <109694228@qq.com>
Date: Fri, 11 Mar 2022 19:49:37 +0800
Subject: [PATCH] Add benchmark.

---
 paddleaudio/tests/benchmark/README.md   |  33 +++---
 paddleaudio/tests/benchmark/features.py | 145 +++++++++++++++++++++---
 2 files changed, 152 insertions(+), 26 deletions(-)

diff --git a/paddleaudio/tests/benchmark/README.md b/paddleaudio/tests/benchmark/README.md
index 9655632d..b391788b 100644
--- a/paddleaudio/tests/benchmark/README.md
+++ b/paddleaudio/tests/benchmark/README.md
@@ -17,24 +17,31 @@ platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0
 benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
 rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio
 plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0
-collected 6 items
+collected 12 items
 
-features.py ......                                                                                                                                                [100%]
+features.py ............                                                                                                                                          [100%]
 
 
-------------------------------------------------------------------------------------------------- benchmark: 6 tests ------------------------------------------------------------------------------------------------
-Name (time in us)                 Min                    Max                   Mean                StdDev                 Median                    IQR            Outliers         OPS            Rounds  Iterations
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-test_melspect_gpu            632.2041 (1.0)         898.7449 (1.0)         709.3824 (1.0)        109.7022 (6.91)        676.1923 (1.0)         115.2642 (22.19)         1;0  1,409.6768 (1.0)           5           1
-test_log_melspect_gpu        912.9159 (1.44)      1,222.0535 (1.36)        931.2489 (1.31)        34.4270 (2.17)        924.9896 (1.37)          5.1949 (1.0)          4;13  1,073.8268 (0.76)         82           1
-test_mfcc_gpu              1,244.8374 (1.97)      1,321.3232 (1.47)      1,262.1319 (1.78)        15.8698 (1.0)       1,258.3155 (1.86)         14.1086 (2.72)         17;9    792.3102 (0.56)         91           1
-test_melspect_cpu         19,106.5744 (30.22)    46,194.2125 (51.40)    27,458.7850 (38.71)    9,786.1071 (616.65)   23,830.0692 (35.24)    14,344.4724 (>1000.0)       3;0     36.4182 (0.03)         14           1
-test_log_melspect_cpu     19,513.7132 (30.87)    20,367.2443 (22.66)    19,765.4018 (27.86)      167.1289 (10.53)    19,750.2729 (29.21)       188.9346 (36.37)        16;1     50.5935 (0.04)         49           1
-test_mfcc_cpu             19,881.3528 (31.45)    20,427.2158 (22.73)    20,104.6574 (28.34)      129.5621 (8.16)     20,075.8977 (29.69)       150.9022 (29.05)        12;2     49.7397 (0.04)         48           1
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+---------------------------------------------------------------------------------------------------- benchmark: 12 tests ----------------------------------------------------------------------------------------------------
+Name (time in us)                            Min                    Max                   Mean                StdDev                 Median                 IQR            Outliers         OPS            Rounds  Iterations
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test_melspect_gpu_torchaudio            210.7229 (1.0)         338.5879 (1.0)         217.4949 (1.0)         11.3591 (1.02)        214.0319 (1.0)        8.3707 (1.0)           6;5  4,597.8093 (1.0)         186           1
+test_log_melspect_gpu_torchaudio        375.4422 (1.78)      1,024.8050 (3.03)        387.3589 (1.78)        18.7080 (1.69)        385.2872 (1.80)       9.4259 (1.13)        31;31  2,581.5853 (0.56)       1420           1
+test_mfcc_gpu_torchaudio                422.4107 (2.00)        700.7364 (2.07)        454.9903 (2.09)        47.3926 (4.27)        436.6031 (2.04)      15.4376 (1.84)      159;193  2,197.8493 (0.48)       1078           1
+test_melspect_gpu                       819.3776 (3.89)      1,161.9311 (3.43)        900.9168 (4.14)       147.0245 (13.26)       830.7453 (3.88)     115.4500 (13.79)         1;1  1,109.9805 (0.24)          5           1
+test_log_melspect_gpu                 1,197.9323 (5.68)      1,280.0004 (3.78)      1,214.0182 (5.58)        11.0918 (1.0)       1,211.6358 (5.66)      10.0820 (1.20)        84;31    823.7109 (0.18)        533           1
+test_mfcc_gpu                         1,337.0719 (6.35)      1,601.5675 (4.73)      1,355.4527 (6.23)        26.4458 (2.38)      1,348.6911 (6.30)      13.1410 (1.57)        16;17    737.7609 (0.16)        193           1
+test_melspect_cpu_torchaudio          1,374.8817 (6.52)      3,937.5033 (11.63)     1,574.8930 (7.24)       355.4223 (32.04)     1,409.1432 (6.58)     193.7435 (23.15)       36;49    634.9638 (0.14)        291           1
+test_log_melspect_cpu_torchaudio      1,390.2634 (6.60)      2,121.2976 (6.27)      1,559.3045 (7.17)       220.3090 (19.86)     1,409.4356 (6.59)     349.1524 (41.71)       106;0    641.3116 (0.14)        445           1
+test_mfcc_cpu_torchaudio              1,445.6678 (6.86)      3,801.8432 (11.23)     1,680.8559 (7.73)       395.5443 (35.66)     1,469.8748 (6.87)     305.6149 (36.51)       38;35    594.9350 (0.13)        469           1
+test_melspect_cpu                    20,620.2641 (97.85)    20,984.0760 (61.98)    20,721.4942 (95.27)       70.2757 (6.34)     20,717.8025 (96.80)     57.8668 (6.91)          6;2     48.2591 (0.01)         30           1
+test_log_melspect_cpu                21,025.3932 (99.78)    48,894.0198 (144.41)   23,057.7049 (106.01)   5,440.3207 (490.48)   21,190.5045 (99.01)    190.0699 (22.71)         4;9     43.3695 (0.01)         44           1
+test_mfcc_cpu                        21,127.2798 (100.26)   45,811.5358 (135.30)   23,176.4022 (106.56)   5,041.0751 (454.49)   21,319.1714 (99.61)    149.0396 (17.80)         5;9     43.1473 (0.01)         44           1
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 
 Legend:
   Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.
     OPS: Operations Per Second, computed as 1 / Mean
-    ========================================================================== 6 passed in 20.51s ===========================================================================
+    ========================================================================== 12 passed in 26.81s ==========================================================================
+
 ```
diff --git a/paddleaudio/tests/benchmark/features.py b/paddleaudio/tests/benchmark/features.py
index 67bec9e6..30ef6f99 100644
--- a/paddleaudio/tests/benchmark/features.py
+++ b/paddleaudio/tests/benchmark/features.py
@@ -11,15 +11,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+import urllib.request
+
 import librosa
 import numpy as np
 import paddle
+import torch
+import torchaudio
 
 import paddleaudio
 
+wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
+if not os.path.isfile(os.path.basename(wav_url)):
+    urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
+
+waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
+waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
+waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
+
 # Feature conf
 mel_conf = {
-    'sr': 16000,
+    'sr': sr,
     'n_fft': 512,
     'hop_length': 128,
     'n_mels': 40,
@@ -30,9 +43,18 @@ mfcc_conf = {
 }
 mfcc_conf.update(mel_conf)
 
-input_shape = (48000)
-waveform = np.random.random(size=input_shape)
-waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
+mel_conf_torchaudio = {
+    'sample_rate': sr,
+    'n_fft': 512,
+    'hop_length': 128,
+    'n_mels': 40,
+    'norm': 'slaney',
+    'mel_scale': 'slaney',
+}
+mfcc_conf_torchaudio = {
+    'sample_rate': sr,
+    'n_mfcc': 20,
+}
 
 
 def enable_cpu_device():
@@ -56,7 +78,7 @@ def test_melspect_cpu(benchmark):
     feature_paddleaudio = benchmark(melspectrogram)
     feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
     np.testing.assert_array_almost_equal(
-        feature_librosa, feature_paddleaudio, decimal=4)
+        feature_librosa, feature_paddleaudio, decimal=3)
 
 
 def test_melspect_gpu(benchmark):
@@ -64,11 +86,39 @@ def test_melspect_gpu(benchmark):
     feature_paddleaudio = benchmark(melspectrogram)
     feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
     np.testing.assert_array_almost_equal(
-        feature_librosa, feature_paddleaudio, decimal=4)
+        feature_librosa, feature_paddleaudio, decimal=3)
+
+
+mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
+    **mel_conf_torchaudio, f_min=0.0)
+
+
+def melspectrogram_torchaudio():
+    return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0)
+
+
+def test_melspect_cpu_torchaudio(benchmark):
+    global waveform_tensor_torch, mel_extractor_torchaudio
+    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
+    waveform_tensor_torch = waveform_tensor_torch.to('cpu')
+    feature_paddleaudio = benchmark(melspectrogram_torchaudio)
+    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
+    np.testing.assert_array_almost_equal(
+        feature_librosa, feature_paddleaudio, decimal=3)
+
+
+def test_melspect_gpu_torchaudio(benchmark):
+    global waveform_tensor_torch, mel_extractor_torchaudio
+    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
+    waveform_tensor_torch = waveform_tensor_torch.to('cuda')
+    feature_torchaudio = benchmark(melspectrogram_torchaudio)
+    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
+    np.testing.assert_array_almost_equal(
+        feature_librosa, feature_torchaudio.cpu(), decimal=3)
 
 
 log_mel_extractor = paddleaudio.features.LogMelSpectrogram(
-    **mel_conf, f_min=0.0, dtype=waveform_tensor.dtype)
+    **mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype)
 
 
 def log_melspectrogram():
@@ -79,18 +129,54 @@ def test_log_melspect_cpu(benchmark):
     enable_cpu_device()
     feature_paddleaudio = benchmark(log_melspectrogram)
     feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
-    feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
+    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
     np.testing.assert_array_almost_equal(
-        feature_librosa, feature_paddleaudio, decimal=4)
+        feature_librosa, feature_paddleaudio, decimal=3)
 
 
 def test_log_melspect_gpu(benchmark):
     enable_gpu_device()
     feature_paddleaudio = benchmark(log_melspectrogram)
     feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
-    feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
+    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
+    np.testing.assert_array_almost_equal(
+        feature_librosa, feature_paddleaudio, decimal=2)
+
+
+amplitude_to_DB = torchaudio.transforms.AmplitudeToDB('power', top_db=80.0)
+
+
+def log_melspectrogram_torchaudio():
+    mel_specgram = mel_extractor_torchaudio(waveform_tensor_torch)
+    return amplitude_to_DB(mel_specgram).squeeze(0)
+
+
+def test_log_melspect_cpu_torchaudio(benchmark):
+    global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB
+
+    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
+    waveform_tensor_torch = waveform_tensor_torch.to('cpu')
+    amplitude_to_DB = amplitude_to_DB.to('cpu')
+
+    feature_paddleaudio = benchmark(log_melspectrogram_torchaudio)
+    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
+    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
+    np.testing.assert_array_almost_equal(
+        feature_librosa, feature_paddleaudio, decimal=3)
+
+
+def test_log_melspect_gpu_torchaudio(benchmark):
+    global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB
+
+    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
+    waveform_tensor_torch = waveform_tensor_torch.to('cuda')
+    amplitude_to_DB = amplitude_to_DB.to('cuda')
+
+    feature_torchaudio = benchmark(log_melspectrogram_torchaudio)
+    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
+    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
     np.testing.assert_array_almost_equal(
-        feature_librosa, feature_paddleaudio, decimal=4)
+        feature_librosa, feature_torchaudio.cpu(), decimal=2)
 
 
 mfcc_extractor = paddleaudio.features.MFCC(
@@ -106,7 +192,7 @@ def test_mfcc_cpu(benchmark):
     feature_paddleaudio = benchmark(mfcc)
     feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
     np.testing.assert_array_almost_equal(
-        feature_librosa, feature_paddleaudio, decimal=4)
+        feature_librosa, feature_paddleaudio, decimal=3)
 
 
 def test_mfcc_gpu(benchmark):
@@ -114,4 +200,37 @@ def test_mfcc_gpu(benchmark):
     feature_paddleaudio = benchmark(mfcc)
     feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
     np.testing.assert_array_almost_equal(
-        feature_librosa, feature_paddleaudio, decimal=4)
+        feature_librosa, feature_paddleaudio, decimal=3)
+
+
+del mel_conf_torchaudio['sample_rate']
+mfcc_extractor_torchaudio = torchaudio.transforms.MFCC(
+    **mfcc_conf_torchaudio, melkwargs=mel_conf_torchaudio)
+
+
+def mfcc_torchaudio():
+    return mfcc_extractor_torchaudio(waveform_tensor_torch).squeeze(0)
+
+
+def test_mfcc_cpu_torchaudio(benchmark):
+    global waveform_tensor_torch, mfcc_extractor_torchaudio
+
+    mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu')
+    waveform_tensor_torch = waveform_tensor_torch.to('cpu')
+
+    feature_paddleaudio = benchmark(mfcc_torchaudio)
+    feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
+    np.testing.assert_array_almost_equal(
+        feature_librosa, feature_paddleaudio, decimal=3)
+
+
+def test_mfcc_gpu_torchaudio(benchmark):
+    global waveform_tensor_torch, mfcc_extractor_torchaudio
+
+    mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cuda')
+    waveform_tensor_torch = waveform_tensor_torch.to('cuda')
+
+    feature_torchaudio = benchmark(mfcc_torchaudio)
+    feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
+    np.testing.assert_array_almost_equal(
+        feature_librosa, feature_torchaudio.cpu(), decimal=3)