make loading _paddleaudio.so work

pull/2164/head
Yang Zhou 3 years ago
parent 3bb904bda9
commit cffe555c91

@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import _extension
from . import compliance
from . import datasets
from . import features
@ -18,7 +20,6 @@ from . import functional
from . import io
from . import metric
from . import utils
from ._ops import ops
from paddlespeech.audio.backends import get_audio_backend
from paddlespeech.audio.backends import list_audio_backends
from paddlespeech.audio.backends import set_audio_backend
@ -30,7 +31,6 @@ __all__ = [
"functional",
"features",
"utils",
'ops'
"list_audio_backends",
"get_audio_backend",
"set_audio_backend",

@ -4,8 +4,69 @@ from pathlib import Path
from ._internal import module_utils as _mod_utils # noqa: F401
_LIB_DIR = Path(__file__) / "lib"
import contextlib
import ctypes
import os
import sys
import types
# Query `hasattr` only once.
_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
'setdlopenflags')
@contextlib.contextmanager
def dl_open_guard():
"""
# https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
shared library to load custom operators.
"""
if _SET_GLOBAL_FLAGS:
old_flags = sys.getdlopenflags()
sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
yield
if _SET_GLOBAL_FLAGS:
sys.setdlopenflags(old_flags)
def resolve_library_path(path: str) -> str:
return os.path.realpath(path)
class _Ops(types.ModuleType):
#__file__ = '_ops.py'
def __init__(self):
super(_Ops, self).__init__('paddlespeech.ops')
self.loaded_libraries = set()
def load_library(self, path):
"""
Loads a shared library from the given path into the current process.
This allows dynamically loading custom operators. For this,
you should compile your operator and
the static registration code into a shared library object, and then
call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
shared object.
After the library is loaded, it is added to the
``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
for the paths of all libraries loaded using this function.
Args:
path (str): A path to a shared library to load.
"""
path = resolve_library_path(path)
with dl_open_guard():
# https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
# Import the shared library into the process, thus running its
# static (global) initialization code in order to register custom
# operators with the JIT.
ctypes.CDLL(path)
self.loaded_libraries.add(path)
_LIB_DIR = Path(__file__).parent / "lib"
def _get_lib_path(lib: str):
suffix = "pyd" if os.name == "nt" else "so"
@ -42,9 +103,12 @@ def _load_lib(lib: str) -> bool:
If a dependency is missing, then users have to install it.
"""
path = _get_lib_path(lib)
warnings.warn("lib path is :" + str(path))
if not path.exists():
warnings.warn("lib path is not exists:" + str(path))
return False
paddlespeech.audio.ops.load_library(path)
#paddlespeech.audio.ops.load_library(path)
ops.load_library(path)
return True
@ -56,7 +120,7 @@ def _init_ffmpeg():
if _FFMPEG_INITIALIZED:
return
if not paddlespeech.audio.ops.paddlleaudio.is_ffmpeg_available():
if not paddlespeech.audio.paddlleaudio.is_ffmpeg_available():
raise RuntimeError(
"paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
)
@ -69,15 +133,15 @@ def _init_ffmpeg():
import paddllespeech.audio._paddlleaudio_ffmpeg # noqa
paddlespeech.audio.ops.paddlleaudio.ffmpeg_init()
if paddlespeech.audio.ops.paddlleaudio.ffmpeg_get_log_level() > 8:
paddlespeech.audio.ops.paddlleaudio.ffmpeg_set_log_level(8)
paddlespeech.audio.paddlleaudio.ffmpeg_init()
if paddlespeech.audio.paddlleaudio.ffmpeg_get_log_level() > 8:
paddlespeech.audio.paddlleaudio.ffmpeg_set_log_level(8)
_FFMPEG_INITIALIZED = True
def _init_extension():
if not _mod_utils.is_module_available("paddlespeech._paddleaudio"):
if not _mod_utils.is_module_available("paddlespeech.audio._paddleaudio"):
warnings.warn("paddlespeech C++ extension is not available.")
return
@ -96,4 +160,6 @@ def _init_extension():
pass
ops = _Ops()
_init_extension()

@ -0,0 +1,55 @@
# code from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py
class AudioMetaData:
"""Return type of ``torchaudio.info`` function.
This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
:ref:`"soundfile" backend with the new interface<soundfile_backend>`.
:ivar int sample_rate: Sample rate
:ivar int num_frames: The number of frames
:ivar int num_channels: The number of channels
:ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
or when it cannot be accurately inferred.
:ivar str encoding: Audio encoding
The values encoding can take are one of the following:
* ``PCM_S``: Signed integer linear PCM
* ``PCM_U``: Unsigned integer linear PCM
* ``PCM_F``: Floating point linear PCM
* ``FLAC``: Flac, Free Lossless Audio Codec
* ``ULAW``: Mu-law
* ``ALAW``: A-law
* ``MP3`` : MP3, MPEG-1 Audio Layer III
* ``VORBIS``: OGG Vorbis
* ``AMR_WB``: Adaptive Multi-Rate
* ``AMR_NB``: Adaptive Multi-Rate Wideband
* ``OPUS``: Opus
* ``HTK``: Single channel 16-bit PCM
* ``UNKNOWN`` : None of above
"""
def __init__(
self,
sample_rate: int,
num_frames: int,
num_channels: int,
bits_per_sample: int,
encoding: str,
):
self.sample_rate = sample_rate
self.num_frames = num_frames
self.num_channels = num_channels
self.bits_per_sample = bits_per_sample
self.encoding = encoding
def __str__(self):
return (
f"AudioMetaData("
f"sample_rate={self.sample_rate}, "
f"num_frames={self.num_frames}, "
f"num_channels={self.num_channels}, "
f"bits_per_sample={self.bits_per_sample}, "
f"encoding={self.encoding}"
f")"
)

@ -6,9 +6,42 @@ from typing import Tuple
from typing import Union
from paddle import Tensor
from .common import AudioMetaData
from paddlespeech.audio._internal import module_utils as _mod_utils
from paddlespeech.audio._paddleaudio import get_info_file
from paddlespeech.audio._paddleaudio import get_info_fileobj
#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData:
raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
filepath: str,
frame_offset: int = 0,
num_frames: int = -1,
normalize: bool = True,
channels_first: bool = True,
format: Optional[str] = None,
) -> Tuple[paddle.Tensor, int]:
raise RuntimeError("Failed to load audio from {}".format(filepath))
def _fail_load_fileobj(fileobj, *args, **kwargs):
raise RuntimeError(f"Failed to load audio from {fileobj}")
_fallback_info = _fail_info
_fallback_info_fileobj = _fail_info_fileobj
_fallback_load = _fail_load
_fallback_load_filebj = _fail_load_fileobj
def load(
filepath: Union[str, Path],
out: Optional[Tensor] = None,
@ -20,10 +53,12 @@ def load(
) -> Tuple[Tensor, int]:
raise RuntimeError("No audio I/O backend is available.")
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
raise RuntimeError("No audio I/O backend is available.")
def info(filepath: str) -> None:
raise RuntimeError("No audio I/O backend is available.")
@_mod_utils.requires_sox()
def info(filepath: str, format: Optional[str]) -> None:
sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
if sinfo is not None:
return AudioMetaData(*sinfo)
return _fallback_info(filepath, format)

@ -11,5 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import fbank
from . import pitch
from .kaldi import fbank
from .kaldi import pitch

@ -13,12 +13,12 @@
# limitations under the License.
from paddlespeech.audio._internal import module_utils
import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank
import paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions as PitchExtractionOptions
import paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions as FrameExtractionOptions
import paddlespeech.audio.ops.paddleaudio.MelBanksOptions as MelBanksOptions
import paddlespeech.audio.ops.paddleaudio.FbankOptions as FbankOptions
import paddlespeech.audio.ops.paddleaudio.ComputeKaldiPitch as ComputeKaldiPitch
from paddlespeech.audio._paddleaudio import ComputeFbank as ComputeFbank
from paddlespeech.audio._paddleaudio import PitchExtractionOptions as PitchExtractionOptions
from paddlespeech.audio._paddleaudio import FrameExtractionOptions as FrameExtractionOptions
from paddlespeech.audio._paddleaudio import MelBanksOptions as MelBanksOptions
from paddlespeech.audio._paddleaudio import FbankOptions as FbankOptions
from paddlespeech.audio._paddleaudio import ComputeKaldiPitch as ComputeKaldiPitch
__all__ = [
'fbank',

@ -16,8 +16,8 @@ import unittest
import numpy as np
import paddle
import paddlespeech.audio.kaldi.fbank as fbank
import paddlespeech.audio.kaldi.pitch as pitch
from paddlespeech.audio.kaldi import fbank as fbank
from paddlespeech.audio.kaldi import pitch as pitch
from kaldiio import ReadHelper
# the groundtruth feats computed in kaldi command below.

Loading…
Cancel
Save