modify info test

3 years ago · a55592f7c2
parent 7abfad804e
commit a55592f7c2
5 changed files with 289 additions and 30 deletions
--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ b/paddlespeech/audio/backends/sox_io_backend.py
@ -88,7 +88,7 @@ def save(filepath: str,
    )
@_mod_utils.requires_sox()
-def info(filepath: str, format: Optional[str] = "") -> None:
+def info(filepath: str, format: Optional[str] = None,) -> AudioMetaData:
    if hasattr(filepath, "read"):
        sinfo = paddleaudio.get_info_fileobj(filepath, format)
        if sinfo is not None:
--- a/paddlespeech/audio/src/pybind/sox/io.cpp
+++ b/paddlespeech/audio/src/pybind/sox/io.cpp
@ -13,13 +13,14 @@ using namespace paddleaudio::sox_utils;
 namespace paddleaudio {
 namespace sox_io {
-auto get_info_file(const std::string &path, const std::string &format)
+auto get_info_file(const std::string &path, 
                   const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
    SoxFormat sf(
        sox_open_read(path.data(),
                      /*signal=*/nullptr,
                      /*encoding=*/nullptr,
-                      /*filetype=*/format.empty() ? nullptr : format.data()));
+                      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
    validate_input_file(sf, path);
@ -61,7 +62,8 @@ std::vector<std::vector<std::string>> get_effects(
  return effects;
 }
-auto get_info_fileobj(py::object fileobj, const std::string &format)
+auto get_info_fileobj(py::object fileobj, 
                      const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
    const auto capacity = [&]() {
        const auto bufsiz = get_buffer_size();
@ -80,7 +82,7 @@ auto get_info_fileobj(py::object fileobj, const std::string &format)
        buf_size,
        /*signal=*/nullptr,
        /*encoding=*/nullptr,
-        /*filetype=*/format.empty() ? nullptr : format.data()));
+        /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
    // In case of streamed data, length can be 0
    validate_input_memfile(sf);
--- a/paddlespeech/audio/src/pybind/sox/io.h
+++ b/paddlespeech/audio/src/pybind/sox/io.h
@ -10,10 +10,12 @@ namespace py = pybind11;
 namespace paddleaudio {
 namespace sox_io {
-auto get_info_file(const std::string &path, const std::string &format)
+auto get_info_file(const std::string &path, 
                   const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
-auto get_info_fileobj(py::object fileobj, const std::string &format)
+auto get_info_fileobj(py::object fileobj,
                   const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
 tl::optional<std::tuple<py::array, int64_t>> load_audio_fileobj(
--- a/tests/unit/audio/backends/sox_io/info_test.py
+++ b/tests/unit/audio/backends/sox_io/info_test.py
@ -1,34 +1,290 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
 import itertools
 import tarfile
 from contextlib import contextmanager
 import numpy as np
 import paddle
 import os
 import io
 from parameterized import parameterized
 from paddlespeech.audio.backends import sox_io_backend
-class TestInfo(unittest.TestCase):
+from tests.unit.common_utils import (
    get_wav_data,
    load_wav,
    save_wav,
    TempDirMixin,
    sox_utils,
    data_utils
 )
-    def test_wav(self, dtype, sample_rate, num_channels, sample_size):
+from common import get_encoding, get_bits_per_sample
-        """check wav file correctly """
+
-        path = 'testdata/test.wav'
+#code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/info_test.py
-        info = sox_io_backend.get_info_file(path)
+
 class TestInfo(TempDirMixin, unittest.TestCase):
    @parameterized.expand(
        list(
            itertools.product(
                ["float32", "int32",],
                [8000, 16000],
                [1, 2],
            )
        ),
    )
    def test_wav(self, dtype, sample_rate, num_channels):
        """`sox_io_backend.info` can check wav file correctly"""
        duration = 1
        path = self.get_temp_path("data.wav")
        data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == sox_utils.get_bit_depth(dtype)
        assert info.encoding == get_encoding("wav", dtype)
    @parameterized.expand(
        list(
            itertools.product(
                ["float32", "int32"],
                [8000, 16000],
                [4, 8, 16, 32],
            )
        ),
    )
    def test_wav_multiple_channels(self, dtype, sample_rate, num_channels):
        """`sox_io_backend.info` can check wav file with channels more than 2 correctly"""
        duration = 1
        path = self.get_temp_path("data.wav")
        data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == sox_utils.get_bit_depth(dtype)
    def test_ulaw(self):
        """`sox_io_backend.info` can check ulaw file correctly"""
        duration = 1
        num_channels = 1
        sample_rate = 8000
        path = self.get_temp_path("data.wav")
        sox_utils.gen_audio_file(
            path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=8, encoding="u-law", duration=duration
        )
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == 8
        assert info.encoding == "ULAW" 
    def test_alaw(self):
        """`sox_io_backend.info` can check alaw file correctly"""
        duration = 1
        num_channels = 1
        sample_rate = 8000
        path = self.get_temp_path("data.wav")
        sox_utils.gen_audio_file(
            path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=8, encoding="a-law", duration=duration
        )
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
-        assert info.num_frames == sample_size # duration*sample_rate
+        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
-        assert info.bits_per_sample == get_bit_depth(dtype)
+        assert info.bits_per_sample == 8
-        assert info.encoding == get_encoding('wav', dtype)
+        assert info.encoding == "ALAW"
 #class TestInfoOpus(unittest.TestCase):
    #@parameterized.expand(
        #list(
            #itertools.product(
                #["96k"],
                #[1, 2],
                #[0, 5, 10],
            #)
        #),
    #)
    #def test_opus(self, bitrate, num_channels, compression_level):
        #"""`sox_io_backend.info` can check opus file correcty"""
        #path = data_utils.get_asset_path("io", f"{bitrate}_{compression_level}_{num_channels}ch.opus")
        #info = sox_io_backend.info(path)
        #assert info.sample_rate == 48000
        #assert info.num_frames == 32768
        #assert info.num_channels == num_channels
        #assert info.bits_per_sample == 0  # bit_per_sample is irrelevant for compressed formats
        #assert info.encoding == "OPUS"
 class FileObjTestBase(TempDirMixin):
    def _gen_file(self, ext, dtype, sample_rate, num_channels, num_frames, *, comments=None):
        path = self.get_temp_path(f"test.{ext}")
        bit_depth = sox_utils.get_bit_depth(dtype)
        duration = num_frames / sample_rate
        comment_file = self._gen_comment_file(comments) if comments else None
        sox_utils.gen_audio_file(
            path,
            sample_rate,
            num_channels=num_channels,
            encoding=sox_utils.get_encoding(dtype),
            bit_depth=bit_depth,
            duration=duration,
            comment_file=comment_file,
        )
        return path
    def _gen_comment_file(self, comments):
        comment_path = self.get_temp_path("comment.txt")
        with open(comment_path, "w") as file_:
            file_.writelines(comments)
        return comment_path
 class Unseekable:
    def __init__(self, fileobj):
        self.fileobj = fileobj
    def read(self, n):
        return self.fileobj.read(n)
 class TestFileObject(FileObjTestBase, unittest.TestCase):
    def _query_fileobj(self, ext, dtype, sample_rate, num_channels, num_frames, *, comments=None):
        path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames, comments=comments)
        format_ = ext if ext in ["mp3"] else None
        with open(path, "rb") as fileobj:
            return sox_io_backend.info(fileobj, format_)
    def _query_bytesio(self, ext, dtype, sample_rate, num_channels, num_frames):
        path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames)
        format_ = ext if ext in ["mp3"] else None
        with open(path, "rb") as file_:
            fileobj = io.BytesIO(file_.read())
        return sox_io_backend.info(fileobj, format_)
    def _query_tarfile(self, ext, dtype, sample_rate, num_channels, num_frames):
        audio_path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames)
        audio_file = os.path.basename(audio_path)
        archive_path = self.get_temp_path("archive.tar.gz")
        with tarfile.TarFile(archive_path, "w") as tarobj:
            tarobj.add(audio_path, arcname=audio_file)
        format_ = ext if ext in ["mp3"] else None
        with tarfile.TarFile(archive_path, "r") as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            return sox_io_backend.info(fileobj, format_)
    @contextmanager
    def _set_buffer_size(self, buffer_size):
        try:
            original_buffer_size = get_buffer_size()
            set_buffer_size(buffer_size)
            yield
        finally:
            set_buffer_size(original_buffer_size)
    @parameterized.expand(
        [
            ("wav", "float32"),
            ("wav", "int32"),
            ("wav", "int16"),
            ("wav", "uint8"),
        ]
    )
    def test_fileobj(self, ext, dtype):
        """Querying audio via file object works"""
        sample_rate = 16000
        num_frames = 3 * sample_rate
        num_channels = 2
        sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels, num_frames)
        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["mp3", "vorbis"] else num_frames
        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)
    @parameterized.expand(
        [
            ("wav", "float32"),
            ("wav", "int32"),
            ("wav", "int16"),
            ("wav", "uint8"),
        ]
    )
    def test_bytesio(self, ext, dtype):
        """Querying audio via ByteIO object works for small data"""
        sample_rate = 16000
        num_frames = 3 * sample_rate
        num_channels = 2
        sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels, num_frames)
        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["mp3", "vorbis"] else num_frames
        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)
    @parameterized.expand(
        [
            ("wav", "float32"),
            ("wav", "int32"),
            ("wav", "int16"),
            ("wav", "uint8"),
        ]
    )
    def test_bytesio_tiny(self, ext, dtype):
        """Querying audio via ByteIO object works for small data"""
        sample_rate = 8000
        num_frames = 4
        num_channels = 2
        sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels, num_frames)
        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["mp3", "vorbis"] else num_frames
        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)
    @parameterized.expand(
        [
            ("wav", "float32"),
            ("wav", "int32"),
            ("wav", "int16"),
            ("wav", "uint8"),
            ("flac", "float32"),
            ("vorbis", "float32"),
            ("amb", "int16"),
        ]
    )
    def test_tarfile(self, ext, dtype):
        """Querying compressed audio via file-like object works"""
        sample_rate = 16000
        num_frames = 3.0 * sample_rate
        num_channels = 2
        sinfo = self._query_tarfile(ext, dtype, sample_rate, num_channels, num_frames)
        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["vorbis"] else num_frames
        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/unit/audio/backends/sox_io/testdata
+++ b/tests/unit/audio/backends/sox_io/testdata
@ -1 +0,0 @@
 ../../features/testdata