make loading _paddleaudio.so work

pull/2164/head
Yang Zhou 3 years ago
parent 3bb904bda9
commit cffe555c91

@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import _extension
from . import compliance from . import compliance
from . import datasets from . import datasets
from . import features from . import features
@ -18,7 +20,6 @@ from . import functional
from . import io from . import io
from . import metric from . import metric
from . import utils from . import utils
from ._ops import ops
from paddlespeech.audio.backends import get_audio_backend from paddlespeech.audio.backends import get_audio_backend
from paddlespeech.audio.backends import list_audio_backends from paddlespeech.audio.backends import list_audio_backends
from paddlespeech.audio.backends import set_audio_backend from paddlespeech.audio.backends import set_audio_backend
@ -30,7 +31,6 @@ __all__ = [
"functional", "functional",
"features", "features",
"utils", "utils",
'ops'
"list_audio_backends", "list_audio_backends",
"get_audio_backend", "get_audio_backend",
"set_audio_backend", "set_audio_backend",

@ -4,8 +4,69 @@ from pathlib import Path
from ._internal import module_utils as _mod_utils # noqa: F401 from ._internal import module_utils as _mod_utils # noqa: F401
_LIB_DIR = Path(__file__) / "lib"
import contextlib
import ctypes
import os
import sys
import types
# Query `hasattr` only once.
_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
'setdlopenflags')
@contextlib.contextmanager
def dl_open_guard():
"""
# https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
shared library to load custom operators.
"""
if _SET_GLOBAL_FLAGS:
old_flags = sys.getdlopenflags()
sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
yield
if _SET_GLOBAL_FLAGS:
sys.setdlopenflags(old_flags)
def resolve_library_path(path: str) -> str:
return os.path.realpath(path)
class _Ops(types.ModuleType):
#__file__ = '_ops.py'
def __init__(self):
super(_Ops, self).__init__('paddlespeech.ops')
self.loaded_libraries = set()
def load_library(self, path):
"""
Loads a shared library from the given path into the current process.
This allows dynamically loading custom operators. For this,
you should compile your operator and
the static registration code into a shared library object, and then
call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
shared object.
After the library is loaded, it is added to the
``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
for the paths of all libraries loaded using this function.
Args:
path (str): A path to a shared library to load.
"""
path = resolve_library_path(path)
with dl_open_guard():
# https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
# Import the shared library into the process, thus running its
# static (global) initialization code in order to register custom
# operators with the JIT.
ctypes.CDLL(path)
self.loaded_libraries.add(path)
_LIB_DIR = Path(__file__).parent / "lib"
def _get_lib_path(lib: str): def _get_lib_path(lib: str):
suffix = "pyd" if os.name == "nt" else "so" suffix = "pyd" if os.name == "nt" else "so"
@ -42,9 +103,12 @@ def _load_lib(lib: str) -> bool:
If a dependency is missing, then users have to install it. If a dependency is missing, then users have to install it.
""" """
path = _get_lib_path(lib) path = _get_lib_path(lib)
warnings.warn("lib path is :" + str(path))
if not path.exists(): if not path.exists():
warnings.warn("lib path is not exists:" + str(path))
return False return False
paddlespeech.audio.ops.load_library(path) #paddlespeech.audio.ops.load_library(path)
ops.load_library(path)
return True return True
@ -56,7 +120,7 @@ def _init_ffmpeg():
if _FFMPEG_INITIALIZED: if _FFMPEG_INITIALIZED:
return return
if not paddlespeech.audio.ops.paddlleaudio.is_ffmpeg_available(): if not paddlespeech.audio.paddlleaudio.is_ffmpeg_available():
raise RuntimeError( raise RuntimeError(
"paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio." "paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
) )
@ -69,15 +133,15 @@ def _init_ffmpeg():
import paddllespeech.audio._paddlleaudio_ffmpeg # noqa import paddllespeech.audio._paddlleaudio_ffmpeg # noqa
paddlespeech.audio.ops.paddlleaudio.ffmpeg_init() paddlespeech.audio.paddlleaudio.ffmpeg_init()
if paddlespeech.audio.ops.paddlleaudio.ffmpeg_get_log_level() > 8: if paddlespeech.audio.paddlleaudio.ffmpeg_get_log_level() > 8:
paddlespeech.audio.ops.paddlleaudio.ffmpeg_set_log_level(8) paddlespeech.audio.paddlleaudio.ffmpeg_set_log_level(8)
_FFMPEG_INITIALIZED = True _FFMPEG_INITIALIZED = True
def _init_extension(): def _init_extension():
if not _mod_utils.is_module_available("paddlespeech._paddleaudio"): if not _mod_utils.is_module_available("paddlespeech.audio._paddleaudio"):
warnings.warn("paddlespeech C++ extension is not available.") warnings.warn("paddlespeech C++ extension is not available.")
return return
@ -96,4 +160,6 @@ def _init_extension():
pass pass
ops = _Ops()
_init_extension() _init_extension()

@ -0,0 +1,55 @@
# code from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py
class AudioMetaData:
"""Return type of ``torchaudio.info`` function.
This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
:ref:`"soundfile" backend with the new interface<soundfile_backend>`.
:ivar int sample_rate: Sample rate
:ivar int num_frames: The number of frames
:ivar int num_channels: The number of channels
:ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
or when it cannot be accurately inferred.
:ivar str encoding: Audio encoding
The values encoding can take are one of the following:
* ``PCM_S``: Signed integer linear PCM
* ``PCM_U``: Unsigned integer linear PCM
* ``PCM_F``: Floating point linear PCM
* ``FLAC``: Flac, Free Lossless Audio Codec
* ``ULAW``: Mu-law
* ``ALAW``: A-law
* ``MP3`` : MP3, MPEG-1 Audio Layer III
* ``VORBIS``: OGG Vorbis
* ``AMR_WB``: Adaptive Multi-Rate
* ``AMR_NB``: Adaptive Multi-Rate Wideband
* ``OPUS``: Opus
* ``HTK``: Single channel 16-bit PCM
* ``UNKNOWN`` : None of above
"""
def __init__(
self,
sample_rate: int,
num_frames: int,
num_channels: int,
bits_per_sample: int,
encoding: str,
):
self.sample_rate = sample_rate
self.num_frames = num_frames
self.num_channels = num_channels
self.bits_per_sample = bits_per_sample
self.encoding = encoding
def __str__(self):
return (
f"AudioMetaData("
f"sample_rate={self.sample_rate}, "
f"num_frames={self.num_frames}, "
f"num_channels={self.num_channels}, "
f"bits_per_sample={self.bits_per_sample}, "
f"encoding={self.encoding}"
f")"
)

@ -6,9 +6,42 @@ from typing import Tuple
from typing import Union from typing import Union
from paddle import Tensor from paddle import Tensor
from .common import AudioMetaData
from paddlespeech.audio._internal import module_utils as _mod_utils
from paddlespeech.audio._paddleaudio import get_info_file
from paddlespeech.audio._paddleaudio import get_info_fileobj
#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData:
raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
filepath: str,
frame_offset: int = 0,
num_frames: int = -1,
normalize: bool = True,
channels_first: bool = True,
format: Optional[str] = None,
) -> Tuple[paddle.Tensor, int]:
raise RuntimeError("Failed to load audio from {}".format(filepath))
def _fail_load_fileobj(fileobj, *args, **kwargs):
raise RuntimeError(f"Failed to load audio from {fileobj}")
_fallback_info = _fail_info
_fallback_info_fileobj = _fail_info_fileobj
_fallback_load = _fail_load
_fallback_load_filebj = _fail_load_fileobj
def load( def load(
filepath: Union[str, Path], filepath: Union[str, Path],
out: Optional[Tensor] = None, out: Optional[Tensor] = None,
@ -20,10 +53,12 @@ def load(
) -> Tuple[Tensor, int]: ) -> Tuple[Tensor, int]:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")
@_mod_utils.requires_sox()
def info(filepath: str) -> None: def info(filepath: str, format: Optional[str]) -> None:
raise RuntimeError("No audio I/O backend is available.") sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
if sinfo is not None:
return AudioMetaData(*sinfo)
return _fallback_info(filepath, format)

@ -11,5 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import fbank from .kaldi import fbank
from . import pitch from .kaldi import pitch

@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
from paddlespeech.audio._internal import module_utils from paddlespeech.audio._internal import module_utils
import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank from paddlespeech.audio._paddleaudio import ComputeFbank as ComputeFbank
import paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions as PitchExtractionOptions from paddlespeech.audio._paddleaudio import PitchExtractionOptions as PitchExtractionOptions
import paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions as FrameExtractionOptions from paddlespeech.audio._paddleaudio import FrameExtractionOptions as FrameExtractionOptions
import paddlespeech.audio.ops.paddleaudio.MelBanksOptions as MelBanksOptions from paddlespeech.audio._paddleaudio import MelBanksOptions as MelBanksOptions
import paddlespeech.audio.ops.paddleaudio.FbankOptions as FbankOptions from paddlespeech.audio._paddleaudio import FbankOptions as FbankOptions
import paddlespeech.audio.ops.paddleaudio.ComputeKaldiPitch as ComputeKaldiPitch from paddlespeech.audio._paddleaudio import ComputeKaldiPitch as ComputeKaldiPitch
__all__ = [ __all__ = [
'fbank', 'fbank',

@ -16,8 +16,8 @@ import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddlespeech.audio.kaldi.fbank as fbank from paddlespeech.audio.kaldi import fbank as fbank
import paddlespeech.audio.kaldi.pitch as pitch from paddlespeech.audio.kaldi import pitch as pitch
from kaldiio import ReadHelper from kaldiio import ReadHelper
# the groundtruth feats computed in kaldi command below. # the groundtruth feats computed in kaldi command below.

Loading…
Cancel
Save