You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/audio/paddleaudio/backends/sox_io_backend.py

107 lines
3.6 KiB

import os
from typing import Optional
from typing import Tuple
import paddle
import paddleaudio
from paddle import Tensor
from paddleaudio._internal import module_utils as _mod_utils
from .common import AudioInfo
#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
def _fail_info(filepath: str, format: Optional[str]) -> AudioInfo:
raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioInfo:
raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
filepath: str,
frame_offset: int=0,
num_frames: int=-1,
normalize: bool=True,
channels_first: bool=True,
format: Optional[str]=None, ) -> Tuple[Tensor, int]:
raise RuntimeError("Failed to load audio from {}".format(filepath))
def _fail_load_fileobj(fileobj, *args, **kwargs):
raise RuntimeError(f"Failed to load audio from {fileobj}")
_fallback_info = _fail_info
_fallback_info_fileobj = _fail_info_fileobj
_fallback_load = _fail_load
_fallback_load_filebj = _fail_load_fileobj
@_mod_utils.requires_sox()
def load(
filepath: str,
frame_offset: int=0,
num_frames: int=-1,
normalize: bool=True,
channels_first: bool=True,
format: Optional[str]=None, ) -> Tuple[Tensor, int]:
if hasattr(filepath, "read"):
ret = paddleaudio._paddleaudio.load_audio_fileobj(
filepath, frame_offset, num_frames, normalize, channels_first,
format)
if ret is not None:
audio_tensor = paddle.to_tensor(ret[0])
return (audio_tensor, ret[1])
return _fallback_load_fileobj(filepath, frame_offset, num_frames,
normalize, channels_first, format)
filepath = os.fspath(filepath)
ret = paddleaudio._paddleaudio.sox_io_load_audio_file(
filepath, frame_offset, num_frames, normalize, channels_first, format)
if ret is not None:
audio_tensor = paddle.to_tensor(ret[0])
return (audio_tensor, ret[1])
return _fallback_load(filepath, frame_offset, num_frames, normalize,
channels_first, format)
@_mod_utils.requires_sox()
def save(
filepath: str,
src: Tensor,
sample_rate: int,
channels_first: bool=True,
compression: Optional[float]=None,
format: Optional[str]=None,
encoding: Optional[str]=None,
bits_per_sample: Optional[int]=None, ):
src_arr = src.numpy()
if hasattr(filepath, "write"):
paddleaudio._paddleaudio.save_audio_fileobj(
filepath, src_arr, sample_rate, channels_first, compression, format,
encoding, bits_per_sample)
return
filepath = os.fspath(filepath)
paddleaudio._paddleaudio.sox_io_save_audio_file(
filepath, src_arr, sample_rate, channels_first, compression, format,
encoding, bits_per_sample)
@_mod_utils.requires_sox()
def info(
filepath: str,
format: Optional[str]=None, ) -> AudioInfo:
if hasattr(filepath, "read"):
sinfo = paddleaudio._paddleaudio.get_info_fileobj(filepath, format)
if sinfo is not None:
return AudioInfo(*sinfo)
return _fallback_info_fileobj(filepath, format)
filepath = os.fspath(filepath)
sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
if sinfo is not None:
return AudioInfo(*sinfo)
return _fallback_info(filepath, format)