make loading _paddleaudio.so work

3 years ago · cffe555c91
parent 3bb904bda9
commit cffe555c91
7 changed files with 179 additions and 23 deletions
--- a/paddlespeech/audio/init.py
+++ b/paddlespeech/audio/init.py
@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import _extension
 from . import compliance
 from . import datasets
 from . import features
@ -18,7 +20,6 @@ from . import functional
 from . import io
 from . import metric
 from . import utils
 from ._ops import ops
 from paddlespeech.audio.backends import get_audio_backend
 from paddlespeech.audio.backends import list_audio_backends
 from paddlespeech.audio.backends import set_audio_backend
@ -30,7 +31,6 @@ __all__ = [
    "functional",
    "features",
    "utils",
    'ops'
    "list_audio_backends",
    "get_audio_backend",
    "set_audio_backend",
--- a/paddlespeech/audio/_extension.py
+++ b/paddlespeech/audio/_extension.py
@ -4,8 +4,69 @@ from pathlib import Path
 from ._internal import module_utils as _mod_utils  # noqa: F401
 _LIB_DIR = Path(__file__) / "lib"
 import contextlib
 import ctypes
 import os
 import sys
 import types
 # Query `hasattr` only once.
 _SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
                                                               'setdlopenflags')
@contextlib.contextmanager
 def dl_open_guard():
    """
    # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
    Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
    shared library to load custom operators.
    """
    if _SET_GLOBAL_FLAGS:
        old_flags = sys.getdlopenflags()
        sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
    yield
    if _SET_GLOBAL_FLAGS:
        sys.setdlopenflags(old_flags)
 def resolve_library_path(path: str) -> str:
    return os.path.realpath(path)
 class _Ops(types.ModuleType):
    #__file__ = '_ops.py'
    def __init__(self):
        super(_Ops, self).__init__('paddlespeech.ops')
        self.loaded_libraries = set()
    def load_library(self, path):
        """
        Loads a shared library from the given path into the current process.
        This allows dynamically loading custom operators. For this, 
        you should compile your operator and 
        the static registration code into a shared library object, and then
        call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
        shared object.
        After the library is loaded, it is added to the
        ``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
        for the paths of all libraries loaded using this function.
        Args:
            path (str): A path to a shared library to load.
        """
        path = resolve_library_path(path)
        with dl_open_guard():
            # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
            # Import the shared library into the process, thus running its
            # static (global) initialization code in order to register custom
            # operators with the JIT.
            ctypes.CDLL(path)
        self.loaded_libraries.add(path)
 _LIB_DIR = Path(__file__).parent / "lib"
 def _get_lib_path(lib: str):
    suffix = "pyd" if os.name == "nt" else "so"
@ -42,9 +103,12 @@ def _load_lib(lib: str) -> bool:
            If a dependency is missing, then users have to install it.
    """
    path = _get_lib_path(lib)
    warnings.warn("lib path is :" + str(path))
    if not path.exists():
        warnings.warn("lib path is not exists:" + str(path))
        return False
-    paddlespeech.audio.ops.load_library(path)
+    #paddlespeech.audio.ops.load_library(path)
    ops.load_library(path)
    return True
@ -56,7 +120,7 @@ def _init_ffmpeg():
    if _FFMPEG_INITIALIZED:
        return
-    if not paddlespeech.audio.ops.paddlleaudio.is_ffmpeg_available():
+    if not paddlespeech.audio.paddlleaudio.is_ffmpeg_available():
        raise RuntimeError(
            "paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
        )
@ -69,15 +133,15 @@ def _init_ffmpeg():
    import paddllespeech.audio._paddlleaudio_ffmpeg  # noqa
-    paddlespeech.audio.ops.paddlleaudio.ffmpeg_init()
+    paddlespeech.audio.paddlleaudio.ffmpeg_init()
-    if paddlespeech.audio.ops.paddlleaudio.ffmpeg_get_log_level() > 8:
+    if paddlespeech.audio.paddlleaudio.ffmpeg_get_log_level() > 8:
-        paddlespeech.audio.ops.paddlleaudio.ffmpeg_set_log_level(8)
+        paddlespeech.audio.paddlleaudio.ffmpeg_set_log_level(8)
    _FFMPEG_INITIALIZED = True
 def _init_extension():
-    if not _mod_utils.is_module_available("paddlespeech._paddleaudio"):
+    if not _mod_utils.is_module_available("paddlespeech.audio._paddleaudio"):
        warnings.warn("paddlespeech C++ extension is not available.")
        return
@ -96,4 +160,6 @@ def _init_extension():
        pass
 ops = _Ops()
 _init_extension()
--- a/paddlespeech/audio/backends/common.py
+++ b/paddlespeech/audio/backends/common.py
@ -0,0 +1,55 @@
 # code from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py
 class AudioMetaData:
    """Return type of ``torchaudio.info`` function.
    This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
    :ref:`"soundfile" backend with the new interface<soundfile_backend>`.
    :ivar int sample_rate: Sample rate
    :ivar int num_frames: The number of frames
    :ivar int num_channels: The number of channels
    :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
        or when it cannot be accurately inferred.
    :ivar str encoding: Audio encoding
        The values encoding can take are one of the following:
            * ``PCM_S``: Signed integer linear PCM
            * ``PCM_U``: Unsigned integer linear PCM
            * ``PCM_F``: Floating point linear PCM
            * ``FLAC``: Flac, Free Lossless Audio Codec
            * ``ULAW``: Mu-law
            * ``ALAW``: A-law
            * ``MP3`` : MP3, MPEG-1 Audio Layer III
            * ``VORBIS``: OGG Vorbis
            * ``AMR_WB``: Adaptive Multi-Rate
            * ``AMR_NB``: Adaptive Multi-Rate Wideband
            * ``OPUS``: Opus
            * ``HTK``: Single channel 16-bit PCM
            * ``UNKNOWN`` : None of above
    """
    def __init__(
        self,
        sample_rate: int,
        num_frames: int,
        num_channels: int,
        bits_per_sample: int,
        encoding: str,
    ):
        self.sample_rate = sample_rate
        self.num_frames = num_frames
        self.num_channels = num_channels
        self.bits_per_sample = bits_per_sample
        self.encoding = encoding
    def __str__(self):
        return (
            f"AudioMetaData("
            f"sample_rate={self.sample_rate}, "
            f"num_frames={self.num_frames}, "
            f"num_channels={self.num_channels}, "
            f"bits_per_sample={self.bits_per_sample}, "
            f"encoding={self.encoding}"
            f")"
        )
--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ b/paddlespeech/audio/backends/sox_io_backend.py
@ -6,9 +6,42 @@ from typing import Tuple
 from typing import Union
 from paddle import Tensor
 from .common import AudioMetaData
 from paddlespeech.audio._internal import module_utils  as _mod_utils
 from paddlespeech.audio._paddleaudio import get_info_file
 from paddlespeech.audio._paddleaudio import get_info_fileobj
 #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
 def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
 def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData:
    raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
 # Note: need to comply TorchScript syntax -- need annotation and no f-string
 def _fail_load(
    filepath: str,
    frame_offset: int = 0,
    num_frames: int = -1,
    normalize: bool = True,
    channels_first: bool = True,
    format: Optional[str] = None,
 ) -> Tuple[paddle.Tensor, int]:
    raise RuntimeError("Failed to load audio from {}".format(filepath))
 def _fail_load_fileobj(fileobj, *args, **kwargs):
    raise RuntimeError(f"Failed to load audio from {fileobj}")
 _fallback_info = _fail_info
 _fallback_info_fileobj = _fail_info_fileobj
 _fallback_load = _fail_load
 _fallback_load_filebj = _fail_load_fileobj
 def load(
    filepath: Union[str, Path],
    out: Optional[Tensor] = None,
@ -20,10 +53,12 @@ def load(
 ) -> Tuple[Tensor, int]:
    raise RuntimeError("No audio I/O backend is available.")
 def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
    raise RuntimeError("No audio I/O backend is available.")
-
+@_mod_utils.requires_sox()
-def info(filepath: str) -> None:
+def info(filepath: str, format: Optional[str]) -> None:
-    raise RuntimeError("No audio I/O backend is available.")
+    sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
    if sinfo is not None:
        return AudioMetaData(*sinfo)
    return _fallback_info(filepath, format)
--- a/paddlespeech/audio/kaldi/init.py
+++ b/paddlespeech/audio/kaldi/init.py
@ -11,5 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import fbank
+from .kaldi import fbank
-from . import pitch
+from .kaldi import pitch
--- a/paddlespeech/audio/kaldi/kaldi.py
+++ b/paddlespeech/audio/kaldi/kaldi.py
@ -13,12 +13,12 @@
 # limitations under the License.
 from paddlespeech.audio._internal import module_utils 
-import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank
+from paddlespeech.audio._paddleaudio import ComputeFbank as ComputeFbank
-import paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions as PitchExtractionOptions
+from paddlespeech.audio._paddleaudio import PitchExtractionOptions as PitchExtractionOptions
-import paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions as FrameExtractionOptions
+from paddlespeech.audio._paddleaudio import FrameExtractionOptions as FrameExtractionOptions
-import paddlespeech.audio.ops.paddleaudio.MelBanksOptions as MelBanksOptions
+from paddlespeech.audio._paddleaudio import MelBanksOptions as MelBanksOptions
-import paddlespeech.audio.ops.paddleaudio.FbankOptions as FbankOptions
+from paddlespeech.audio._paddleaudio import FbankOptions as FbankOptions
-import paddlespeech.audio.ops.paddleaudio.ComputeKaldiPitch as ComputeKaldiPitch
+from paddlespeech.audio._paddleaudio import ComputeKaldiPitch as ComputeKaldiPitch
 __all__ = [
    'fbank',
--- a/tests/unit/audio/features/test_kaldi_feat.py
+++ b/tests/unit/audio/features/test_kaldi_feat.py
@ -16,8 +16,8 @@ import unittest
 import numpy as np
 import paddle
-import paddlespeech.audio.kaldi.fbank as fbank
+from paddlespeech.audio.kaldi import fbank as fbank
-import paddlespeech.audio.kaldi.pitch as pitch
+from paddlespeech.audio.kaldi import pitch as pitch
 from kaldiio import ReadHelper
 # the groundtruth feats computed in kaldi command below.