From 7abfad804ec79f811f78a9a0ce46db18394782be Mon Sep 17 00:00:00 2001 From: YangZhou Date: Mon, 15 Aug 2022 16:06:17 +0800 Subject: [PATCH] fix typo --- paddlespeech/audio/sox_effects/sox_effects.py | 48 ------------------- paddlespeech/audio/src/pybind/sox/io.cpp | 4 ++ paddlespeech/audio/src/pybind/sox/types.h | 2 +- tests/unit/audio/backends/sox_io/save_test.py | 6 +-- tests/unit/common_utils/case_utils.py | 6 ++- 5 files changed, 12 insertions(+), 54 deletions(-) diff --git a/paddlespeech/audio/sox_effects/sox_effects.py b/paddlespeech/audio/sox_effects/sox_effects.py index a984d2925..e9b839c1a 100644 --- a/paddlespeech/audio/sox_effects/sox_effects.py +++ b/paddlespeech/audio/sox_effects/sox_effects.py @@ -118,39 +118,6 @@ def apply_effects_tensor( >>> sample_rate 8000 - Example - Torchscript-able transform - >>> - >>> # Use `apply_effects_tensor` in `paddle.nn.Module` and dump it to file, - >>> # then run sox effect via Torchscript runtime. - >>> - >>> class SoxEffectTransform(paddle.nn.Module): - ... effects: List[List[str]] - ... - ... def __init__(self, effects: List[List[str]]): - ... super().__init__() - ... self.effects = effects - ... - ... def forward(self, tensor: paddle.Tensor, sample_rate: int): - ... return sox_effects.apply_effects_tensor( - ... tensor, sample_rate, self.effects) - ... - ... - >>> # Create transform object - >>> effects = [ - ... ["lowpass", "-1", "300"], # apply single-pole lowpass filter - ... ["rate", "8000"], # change sample rate to 8000 - ... ] - >>> transform = SoxEffectTensorTransform(effects, input_sample_rate) - >>> - >>> # Dump it to file and load - >>> path = 'sox_effect.zip' - >>> paddle.jit.script(trans).save(path) - >>> transform = paddle.jit.load(path) - >>> - >>>> # Run transform - >>> waveform, input_sample_rate = paddleaudio.load("input.wav") - >>> waveform, sample_rate = transform(waveform, input_sample_rate) - >>> assert sample_rate == 8000 """ tensor_np = tensor.numpy() ret = paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate, effects, channels_first) @@ -169,10 +136,6 @@ def apply_effects_file( ) -> Tuple[paddle.Tensor, int]: """Apply sox effects to the audio file and load the resulting data as Tensor - .. devices:: CPU - - .. properties:: TorchScript - Note: This function works in the way very similar to ``sox`` command, however there are slight differences. For example, ``sox`` commnad adds certain effects automatically (such as @@ -183,17 +146,6 @@ def apply_effects_file( Args: path (path-like object or file-like object): - Source of audio data. When the function is not compiled by TorchScript, - (e.g. ``paddle.jit.script``), the following types are accepted: - - * ``path-like``: file path - * ``file-like``: Object with ``read(size: int) -> bytes`` method, - which returns byte string of at most ``size`` length. - - When the function is compiled by TorchScript, only ``str`` type is allowed. - - Note: This argument is intentionally annotated as ``str`` only for - TorchScript compiler compatibility. effects (List[List[str]]): List of effects. normalize (bool, optional): When ``True``, this function always return ``float32``, and sample values are diff --git a/paddlespeech/audio/src/pybind/sox/io.cpp b/paddlespeech/audio/src/pybind/sox/io.cpp index 4c27e6aab..78b8af991 100644 --- a/paddlespeech/audio/src/pybind/sox/io.cpp +++ b/paddlespeech/audio/src/pybind/sox/io.cpp @@ -136,12 +136,16 @@ void save_audio_file(const std::string& path, const auto num_channels = tensor.shape(channels_first ? 0 : 1); //TORCH_CHECK(num_channels == 1, // "amr-nb format only supports single channel audio."); + assert(num_channels == 1); } else if (filetype == "htk") { const auto num_channels = tensor.shape(channels_first ? 0 : 1); // TORCH_CHECK(num_channels == 1, // "htk format only supports single channel audio."); + assert(num_channels == 1); } else if (filetype == "gsm") { const auto num_channels = tensor.shape(channels_first ? 0 : 1); + assert(num_channels == 1); + assert(sample_rate == 8000); //TORCH_CHECK(num_channels == 1, // "gsm format only supports single channel audio."); //TORCH_CHECK(sample_rate == 8000, diff --git a/paddlespeech/audio/src/pybind/sox/types.h b/paddlespeech/audio/src/pybind/sox/types.h index 824c0f632..780840161 100644 --- a/paddlespeech/audio/src/pybind/sox/types.h +++ b/paddlespeech/audio/src/pybind/sox/types.h @@ -55,4 +55,4 @@ BitDepth get_bit_depth_from_option(const tl::optional bit_depth); std::string get_encoding(sox_encoding_t encoding); } // namespace sox_utils -} // namespace torchaudio \ No newline at end of file +} // namespace paddleaudio \ No newline at end of file diff --git a/tests/unit/audio/backends/sox_io/save_test.py b/tests/unit/audio/backends/sox_io/save_test.py index b07af70f2..7942f018d 100644 --- a/tests/unit/audio/backends/sox_io/save_test.py +++ b/tests/unit/audio/backends/sox_io/save_test.py @@ -64,7 +64,7 @@ class TestSaveBase(TempDirMixin): | | | 2.1. load with scipy | 3.1. Convert to the target | then save it into the target | format depth with sox - | format with torchaudio | + | format with paddleaudio | v v target format target format | | @@ -83,7 +83,7 @@ class TestSaveBase(TempDirMixin): cmp_bit_depth = 32 src_path = self.get_temp_path("1.source.wav") - tgt_path = self.get_temp_path(f"2.1.torchaudio.{format}") + tgt_path = self.get_temp_path(f"2.1.paddleaudio.{format}") tst_path = self.get_temp_path("2.2.result.wav") sox_path = self.get_temp_path(f"3.1.sox.{format}") ref_path = self.get_temp_path("3.2.ref.wav") @@ -92,7 +92,7 @@ class TestSaveBase(TempDirMixin): data = get_wav_data(src_dtype, num_channels, normalize=False, num_frames=num_frames) save_wav(src_path, data, sample_rate) - # 2.1. Convert the original wav to target format with torchaudio + # 2.1. Convert the original wav to target format with paddleaudio data = load_wav(src_path, normalize=False)[0] if test_mode == "path": sox_io_backend.save( diff --git a/tests/unit/common_utils/case_utils.py b/tests/unit/common_utils/case_utils.py index 6f4326f56..406d293b6 100644 --- a/tests/unit/common_utils/case_utils.py +++ b/tests/unit/common_utils/case_utils.py @@ -7,6 +7,8 @@ import tempfile import time import unittest +#code is from:https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/common_utils/case_utils.py + import paddle from paddlespeech.audio._internal.module_utils import ( is_kaldi_available, @@ -24,9 +26,9 @@ class TempDirMixin: @classmethod def get_base_temp_dir(cls): - # If TORCHAUDIO_TEST_TEMP_DIR is set, use it instead of temporary directory. + # If PADDLEAUDIO_TEST_TEMP_DIR is set, use it instead of temporary directory. # this is handy for debugging. - key = "TORCHAUDIO_TEST_TEMP_DIR" + key = "PADDLEAUDIO_TEST_TEMP_DIR" if key in os.environ: return os.environ[key] if cls.temp_dir_ is None: