diff --git a/paddlespeech/audio/__init__.py b/paddlespeech/audio/__init__.py
index 26da1991f..8a231ae5b 100644
--- a/paddlespeech/audio/__init__.py
+++ b/paddlespeech/audio/__init__.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from . import _extension
 from . import compliance
 from . import datasets
 from . import features
@@ -18,7 +20,6 @@ from . import functional
 from . import io
 from . import metric
 from . import utils
-from ._ops import ops
 from paddlespeech.audio.backends import get_audio_backend
 from paddlespeech.audio.backends import list_audio_backends
 from paddlespeech.audio.backends import set_audio_backend
@@ -30,7 +31,6 @@ __all__ = [
     "functional",
     "features",
     "utils",
-    'ops'
     "list_audio_backends",
     "get_audio_backend",
     "set_audio_backend",
diff --git a/paddlespeech/audio/_extension.py b/paddlespeech/audio/_extension.py
index 5629a2826..000fae131 100644
--- a/paddlespeech/audio/_extension.py
+++ b/paddlespeech/audio/_extension.py
@@ -4,8 +4,69 @@ from pathlib import Path
 
 from ._internal import module_utils as _mod_utils  # noqa: F401
 
-_LIB_DIR = Path(__file__) / "lib"
 
+import contextlib
+import ctypes
+import os
+import sys
+import types
+
+# Query `hasattr` only once.
+_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
+                                                               'setdlopenflags')
+
+
+@contextlib.contextmanager
+def dl_open_guard():
+    """
+    # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
+    Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
+    shared library to load custom operators.
+    """
+    if _SET_GLOBAL_FLAGS:
+        old_flags = sys.getdlopenflags()
+        sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
+    yield
+    if _SET_GLOBAL_FLAGS:
+        sys.setdlopenflags(old_flags)
+
+
+def resolve_library_path(path: str) -> str:
+    return os.path.realpath(path)
+
+
+class _Ops(types.ModuleType):
+    #__file__ = '_ops.py'
+
+    def __init__(self):
+        super(_Ops, self).__init__('paddlespeech.ops')
+        self.loaded_libraries = set()
+
+    def load_library(self, path):
+        """
+        Loads a shared library from the given path into the current process.
+        This allows dynamically loading custom operators. For this, 
+        you should compile your operator and 
+        the static registration code into a shared library object, and then
+        call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
+        shared object.
+        After the library is loaded, it is added to the
+        ``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
+        for the paths of all libraries loaded using this function.
+        Args:
+            path (str): A path to a shared library to load.
+        """
+        path = resolve_library_path(path)
+        with dl_open_guard():
+            # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
+            # Import the shared library into the process, thus running its
+            # static (global) initialization code in order to register custom
+            # operators with the JIT.
+            ctypes.CDLL(path)
+        self.loaded_libraries.add(path)
+
+
+_LIB_DIR = Path(__file__).parent / "lib"
 
 def _get_lib_path(lib: str):
     suffix = "pyd" if os.name == "nt" else "so"
@@ -42,9 +103,12 @@ def _load_lib(lib: str) -> bool:
             If a dependency is missing, then users have to install it.
     """
     path = _get_lib_path(lib)
+    warnings.warn("lib path is :" + str(path))
     if not path.exists():
+        warnings.warn("lib path is not exists:" + str(path))
         return False
-    paddlespeech.audio.ops.load_library(path)
+    #paddlespeech.audio.ops.load_library(path)
+    ops.load_library(path)
     return True
 
 
@@ -56,7 +120,7 @@ def _init_ffmpeg():
     if _FFMPEG_INITIALIZED:
         return
 
-    if not paddlespeech.audio.ops.paddlleaudio.is_ffmpeg_available():
+    if not paddlespeech.audio._paddlleaudio.is_ffmpeg_available():
         raise RuntimeError(
             "paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
         )
@@ -69,15 +133,15 @@ def _init_ffmpeg():
 
     import paddllespeech.audio._paddlleaudio_ffmpeg  # noqa
 
-    paddlespeech.audio.ops.paddlleaudio.ffmpeg_init()
-    if paddlespeech.audio.ops.paddlleaudio.ffmpeg_get_log_level() > 8:
-        paddlespeech.audio.ops.paddlleaudio.ffmpeg_set_log_level(8)
+    paddlespeech.audio._paddlleaudio.ffmpeg_init()
+    if paddlespeech.audio._paddlleaudio.ffmpeg_get_log_level() > 8:
+        paddlespeech.audio._paddlleaudio.ffmpeg_set_log_level(8)
 
     _FFMPEG_INITIALIZED = True
 
 
 def _init_extension():
-    if not _mod_utils.is_module_available("paddlespeech._paddleaudio"):
+    if not _mod_utils.is_module_available("paddlespeech.audio._paddleaudio"):
         warnings.warn("paddlespeech C++ extension is not available.")
         return
 
@@ -96,4 +160,6 @@ def _init_extension():
         pass
 
 
+ops = _Ops()
+
 _init_extension()
diff --git a/paddlespeech/audio/backends/common.py b/paddlespeech/audio/backends/common.py
new file mode 100644
index 000000000..7ccab1d33
--- /dev/null
+++ b/paddlespeech/audio/backends/common.py
@@ -0,0 +1,55 @@
+# code from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py
+
+class AudioMetaData:
+    """Return type of ``torchaudio.info`` function.
+
+    This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
+    :ref:`"soundfile" backend with the new interface<soundfile_backend>`.
+
+    :ivar int sample_rate: Sample rate
+    :ivar int num_frames: The number of frames
+    :ivar int num_channels: The number of channels
+    :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
+        or when it cannot be accurately inferred.
+    :ivar str encoding: Audio encoding
+        The values encoding can take are one of the following:
+
+            * ``PCM_S``: Signed integer linear PCM
+            * ``PCM_U``: Unsigned integer linear PCM
+            * ``PCM_F``: Floating point linear PCM
+            * ``FLAC``: Flac, Free Lossless Audio Codec
+            * ``ULAW``: Mu-law
+            * ``ALAW``: A-law
+            * ``MP3`` : MP3, MPEG-1 Audio Layer III
+            * ``VORBIS``: OGG Vorbis
+            * ``AMR_WB``: Adaptive Multi-Rate
+            * ``AMR_NB``: Adaptive Multi-Rate Wideband
+            * ``OPUS``: Opus
+            * ``HTK``: Single channel 16-bit PCM
+            * ``UNKNOWN`` : None of above
+    """
+
+    def __init__(
+        self,
+        sample_rate: int,
+        num_frames: int,
+        num_channels: int,
+        bits_per_sample: int,
+        encoding: str,
+    ):
+        self.sample_rate = sample_rate
+        self.num_frames = num_frames
+        self.num_channels = num_channels
+        self.bits_per_sample = bits_per_sample
+        self.encoding = encoding
+
+    def __str__(self):
+        return (
+            f"AudioMetaData("
+            f"sample_rate={self.sample_rate}, "
+            f"num_frames={self.num_frames}, "
+            f"num_channels={self.num_channels}, "
+            f"bits_per_sample={self.bits_per_sample}, "
+            f"encoding={self.encoding}"
+            f")"
+        )
diff --git a/paddlespeech/audio/backends/sox_io_backend.py b/paddlespeech/audio/backends/sox_io_backend.py
index f22222d66..a91220042 100644
--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ b/paddlespeech/audio/backends/sox_io_backend.py
@@ -5,9 +5,41 @@ from typing import Tuple
 from typing import Union
 
 from paddle import Tensor
+from .common import AudioMetaData
+
+from paddlespeech.audio._internal import module_utils  as _mod_utils
+from paddlespeech.audio._paddleaudio import get_info_file
+from paddlespeech.audio._paddleaudio import get_info_fileobj
 
 #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
 
+def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
+    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
+
+
+def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData:
+    raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
+
+
+# Note: need to comply TorchScript syntax -- need annotation and no f-string
+def _fail_load(
+    filepath: str,
+    frame_offset: int = 0,
+    num_frames: int = -1,
+    normalize: bool = True,
+    channels_first: bool = True,
+    format: Optional[str] = None,
+) -> Tuple[paddle.Tensor, int]:
+    raise RuntimeError("Failed to load audio from {}".format(filepath))
+
+
+def _fail_load_fileobj(fileobj, *args, **kwargs):
+    raise RuntimeError(f"Failed to load audio from {fileobj}")
+
+_fallback_info = _fail_info
+_fallback_info_fileobj = _fail_info_fileobj
+_fallback_load = _fail_load
+_fallback_load_filebj = _fail_load_fileobj
 
 def load(
         filepath: Union[str, Path],
@@ -19,14 +51,16 @@ def load(
         filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
     raise RuntimeError("No audio I/O backend is available.")
 
-
-def save(filepath: str,
-         src: Tensor,
-         sample_rate: int,
-         precision: int=16,
-         channels_first: bool=True) -> None:
+def save(filepath: str, 
+         src: Tensor, 
+         sample_rate: int, 
+         precision: int = 16, 
+         channels_first: bool = True) -> None:
     raise RuntimeError("No audio I/O backend is available.")
 
-
-def info(filepath: str) -> None:
-    raise RuntimeError("No audio I/O backend is available.")
+@_mod_utils.requires_sox()
+def info(filepath: str, format: Optional[str]) -> None:
+    sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
+    if sinfo is not None:
+        return AudioMetaData(*sinfo)
+    return _fallback_info(filepath, format)
diff --git a/paddlespeech/audio/kaldi/__init__.py b/paddlespeech/audio/kaldi/__init__.py
index 2b52ad23d..f951e280a 100644
--- a/paddlespeech/audio/kaldi/__init__.py
+++ b/paddlespeech/audio/kaldi/__init__.py
@@ -11,5 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import fbank
-from . import pitch
+from .kaldi import fbank
+from .kaldi import pitch
diff --git a/paddlespeech/audio/kaldi/kaldi.py b/paddlespeech/audio/kaldi/kaldi.py
index 69347ddbb..e8e5693c4 100644
--- a/paddlespeech/audio/kaldi/kaldi.py
+++ b/paddlespeech/audio/kaldi/kaldi.py
@@ -12,13 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddlespeech.audio._internal import module_utils
-import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank
-import paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions as PitchExtractionOptions
-import paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions as FrameExtractionOptions
-import paddlespeech.audio.ops.paddleaudio.MelBanksOptions as MelBanksOptions
-import paddlespeech.audio.ops.paddleaudio.FbankOptions as FbankOptions
-import paddlespeech.audio.ops.paddleaudio.ComputeKaldiPitch as ComputeKaldiPitch
+from paddlespeech.audio._internal import module_utils 
 
 __all__ = [
     'fbank',
@@ -55,9 +49,9 @@ def fbank(
         htk_compat: bool=False,
         use_log_fbank: bool=True,
         use_power: bool=True):
-    frame_opts = FrameExtractionOptions()
-    mel_opts = MelBanksOptions()
-    fbank_opts = FbankOptions()
+    frame_opts = paddlespeech.audio._paddleaudio.FrameExtractionOptions()
+    mel_opts = paddlespeech.audio._paddleaudio.MelBanksOptions()
+    fbank_opts = paddlespeech.audio._paddleaudio.FbankOptions()
     frame_opts.samp_freq = samp_freq
     frame_opts.frame_shift_ms = frame_shift_ms
     frame_opts.frame_length_ms = frame_length_ms
@@ -86,7 +80,7 @@ def fbank(
     fbank_opts.htk_compat = htk_compat
     fbank_opts.use_log_fbank = use_log_fbank
     fbank_opts.use_power = use_power
-    feat = ComputeFbank(frame_opts, mel_opts, fbank_opts, wav)
+    feat = paddlespeech.audio._paddleaudio.CopmputeFbank(frame_opts, mel_opts, fbank_opts, wav)
     return feat
 
 
@@ -112,7 +106,7 @@ def pitch(wav,
           recompute_frame: int=500,
           nccf_ballast_online: bool=False,
           snip_edges: bool=True):
-    pitch_opts = PitchExtractionOptions()
+    pitch_opts = paddlespeech.audio._paddleaudio.PitchExtractionOptions()
     pitch_opts.samp_freq = samp_freq
     pitch_opts.frame_shift_ms = frame_shift_ms
     pitch_opts.frame_length_ms = frame_length_ms
@@ -133,5 +127,5 @@ def pitch(wav,
     pitch_opts.recompute_frame = recompute_frame
     pitch_opts.nccf_ballast_online = nccf_ballast_online
     pitch_opts.snip_edges = snip_edges
-    pitch = ComputeKaldiPitch(pitch_opts, wav)
+    pitch = paddlespeech.audio._paddleaudio.ComputeKaldiPitch(pitch_opts, wav)
     return pitch
diff --git a/tests/unit/audio/features/test_kaldi_feat.py b/tests/unit/audio/features/test_kaldi_feat.py
index 031fdfac2..e0ca1fa1d 100644
--- a/tests/unit/audio/features/test_kaldi_feat.py
+++ b/tests/unit/audio/features/test_kaldi_feat.py
@@ -16,8 +16,8 @@ import unittest
 import numpy as np
 import paddle
 
-import paddlespeech.audio.kaldi.fbank as fbank
-import paddlespeech.audio.kaldi.pitch as pitch
+from paddlespeech.audio.kaldi import fbank as fbank
+from paddlespeech.audio.kaldi import pitch as pitch
 from kaldiio import ReadHelper
 
 # the groundtruth feats computed in kaldi command below.