You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/audio/tests/backends/sox_io/save_test.py

189 lines
6.5 KiB

import io
import platform
import unittest
if platform.system() == "Windows":
import warnings
warnings.warn("sox io not support in Windows, please skip test.")
exit()
import numpy as np
from paddleaudio.backends import sox_io_backend
from common_utils import (get_wav_data, load_wav, save_wav, nested_params,
TempDirMixin, sox_utils)
#code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/save_test.py
def _get_sox_encoding(encoding):
encodings = {
"PCM_F": "floating-point",
"PCM_S": "signed-integer",
"PCM_U": "unsigned-integer",
"ULAW": "u-law",
"ALAW": "a-law",
}
return encodings.get(encoding)
class TestSaveBase(TempDirMixin):
def assert_save_consistency(
self,
format: str,
*,
compression: float=None,
encoding: str=None,
bits_per_sample: int=None,
sample_rate: float=8000,
num_channels: int=2,
num_frames: float=3 * 8000,
src_dtype: str="int32",
test_mode: str="path", ):
"""`save` function produces file that is comparable with `sox` command
To compare that the file produced by `save` function agains the file produced by
the equivalent `sox` command, we need to load both files.
But there are many formats that cannot be opened with common Python modules (like
SciPy).
So we use `sox` command to prepare the original data and convert the saved files
into a format that SciPy can read (PCM wav).
The following diagram illustrates this process. The difference is 2.1. and 3.1.
This assumes that
- loading data with SciPy preserves the data well.
- converting the resulting files into WAV format with `sox` preserve the data well.
x
| 1. Generate source wav file with SciPy
|
v
-------------- wav ----------------
| |
| 2.1. load with scipy | 3.1. Convert to the target
| then save it into the target | format depth with sox
| format with paddleaudio |
v v
target format target format
| |
| 2.2. Convert to wav with sox | 3.2. Convert to wav with sox
| |
v v
wav wav
| |
| 2.3. load with scipy | 3.3. load with scipy
| |
v v
tensor -------> compare <--------- tensor
"""
cmp_encoding = "floating-point"
cmp_bit_depth = 32
src_path = self.get_temp_path("1.source.wav")
tgt_path = self.get_temp_path(f"2.1.paddleaudio.{format}")
tst_path = self.get_temp_path("2.2.result.wav")
sox_path = self.get_temp_path(f"3.1.sox.{format}")
ref_path = self.get_temp_path("3.2.ref.wav")
# 1. Generate original wav
data = get_wav_data(
src_dtype, num_channels, normalize=False, num_frames=num_frames)
save_wav(src_path, data, sample_rate)
# 2.1. Convert the original wav to target format with paddleaudio
data = load_wav(src_path, normalize=False)[0]
if test_mode == "path":
sox_io_backend.save(
tgt_path,
data,
sample_rate,
compression=compression,
encoding=encoding,
bits_per_sample=bits_per_sample)
elif test_mode == "fileobj":
with open(tgt_path, "bw") as file_:
sox_io_backend.save(
file_,
data,
sample_rate,
format=format,
compression=compression,
encoding=encoding,
bits_per_sample=bits_per_sample, )
elif test_mode == "bytesio":
file_ = io.BytesIO()
sox_io_backend.save(
file_,
data,
sample_rate,
format=format,
compression=compression,
encoding=encoding,
bits_per_sample=bits_per_sample, )
file_.seek(0)
with open(tgt_path, "bw") as f:
f.write(file_.read())
else:
raise ValueError(f"Unexpected test mode: {test_mode}")
# 2.2. Convert the target format to wav with sox
sox_utils.convert_audio_file(
tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
# 2.3. Load with SciPy
found = load_wav(tst_path, normalize=False)[0]
# 3.1. Convert the original wav to target format with sox
sox_encoding = _get_sox_encoding(encoding)
sox_utils.convert_audio_file(
src_path,
sox_path,
compression=compression,
encoding=sox_encoding,
bit_depth=bits_per_sample)
# 3.2. Convert the target format to wav with sox
sox_utils.convert_audio_file(
sox_path, ref_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
# 3.3. Load with SciPy
expected = load_wav(ref_path, normalize=False)[0]
np.testing.assert_array_almost_equal(found, expected)
class TestSave(TestSaveBase, unittest.TestCase):
@nested_params(
[
"path",
],
[
("PCM_U", 8),
("PCM_S", 16),
("PCM_S", 32),
("PCM_F", 32),
("PCM_F", 64),
("ULAW", 8),
("ALAW", 8),
], )
def test_save_wav(self, test_mode, enc_params):
encoding, bits_per_sample = enc_params
self.assert_save_consistency(
"wav",
encoding=encoding,
bits_per_sample=bits_per_sample,
test_mode=test_mode)
@nested_params(
[
"path",
],
[
("float32", ),
("int32", ),
], )
def test_save_wav_dtype(self, test_mode, params):
(dtype, ) = params
self.assert_save_consistency(
"wav", src_dtype=dtype, test_mode=test_mode)
if __name__ == '__main__':
unittest.main()