diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0d260dd3d..57d806e16 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,7 +57,7 @@ include(openblas)
 
 # packages
 find_package(Python3 COMPONENTS Interpreter Development)
-find_package(pybind11 CONFIG)
+find_package(pybind11 CONFIG REQUIRED)
 
 
 # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")
diff --git a/cmake/summary.cmake b/cmake/summary.cmake
index 67e8be0a9..f04d44698 100644
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -37,5 +37,9 @@ function (onnx_print_configuration_summary)
   message(STATUS "    Python executable     : ${Python_EXECUTABLE}")
   message(STATUS "    Python includes       : ${Python_INCLUDE_DIR}")
   message(STATUS "    Python libraries      : ${Python_LIBRARY}")
-
+  message(STATUS "  PYBIND11                  : ${pybind11_FOUND}")
+  message(STATUS "    Pybind11 version        : ${pybind11_VERSION}")
+  message(STATUS "    Pybind11 include        : ${pybind11_INCLUDE_DIR}")
+  message(STATUS "    Pybind11 includes       : ${pybind11_INCLUDE_DIRS}")
+  message(STATUS "    Pybind11 libraries      : ${pybind11_LIBRARIES}")
 endfunction()
\ No newline at end of file
diff --git a/paddlespeech/audio/_internal/module_utils.py b/paddlespeech/audio/_internal/module_utils.py
index 919ba852d..ca1ba4b84 100644
--- a/paddlespeech/audio/_internal/module_utils.py
+++ b/paddlespeech/audio/_internal/module_utils.py
@@ -5,6 +5,7 @@ from typing import Optional
 
 #code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
 
+
 def is_module_available(*modules: str) -> bool:
     r"""Returns if a top-level module with :attr:`name` exists *without**
     importing it. This is generally safer than try-catch block around a
diff --git a/paddlespeech/audio/backends/no_backend.py b/paddlespeech/audio/backends/no_backend.py
index d391f7aaf..157536f46 100644
--- a/paddlespeech/audio/backends/no_backend.py
+++ b/paddlespeech/audio/backends/no_backend.py
@@ -8,21 +8,25 @@ from paddle import Tensor
 
 #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
 
+
 def load(
-    filepath: Union[str, Path],
-    out: Optional[Tensor] = None,
-    normalization: Union[bool, float, Callable] = True,
-    channels_first: bool = True,
-    num_frames: int = 0,
-    offset: int = 0,
-    filetype: Optional[str] = None,
-) -> Tuple[Tensor, int]:
+        filepath: Union[str, Path],
+        out: Optional[Tensor]=None,
+        normalization: Union[bool, float, Callable]=True,
+        channels_first: bool=True,
+        num_frames: int=0,
+        offset: int=0,
+        filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
     raise RuntimeError("No audio I/O backend is available.")
 
 
-def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
+def save(filepath: str,
+         src: Tensor,
+         sample_rate: int,
+         precision: int=16,
+         channels_first: bool=True) -> None:
     raise RuntimeError("No audio I/O backend is available.")
 
 
 def info(filepath: str) -> None:
-    raise RuntimeError("No audio I/O backend is available.")
\ No newline at end of file
+    raise RuntimeError("No audio I/O backend is available.")
diff --git a/paddlespeech/audio/backends/sox_io_backend.py b/paddlespeech/audio/backends/sox_io_backend.py
index 53c2ad0df..a91220042 100644
--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ b/paddlespeech/audio/backends/sox_io_backend.py
@@ -1,4 +1,3 @@
-
 from pathlib import Path
 from typing import Callable
 from typing import Optional
@@ -43,17 +42,20 @@ _fallback_load = _fail_load
 _fallback_load_filebj = _fail_load_fileobj
 
 def load(
-    filepath: Union[str, Path],
-    out: Optional[Tensor] = None,
-    normalization: Union[bool, float, Callable] = True,
-    channels_first: bool = True,
-    num_frames: int = 0,
-    offset: int = 0,
-    filetype: Optional[str] = None,
-) -> Tuple[Tensor, int]:
+        filepath: Union[str, Path],
+        out: Optional[Tensor]=None,
+        normalization: Union[bool, float, Callable]=True,
+        channels_first: bool=True,
+        num_frames: int=0,
+        offset: int=0,
+        filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
     raise RuntimeError("No audio I/O backend is available.")
 
-def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
+def save(filepath: str, 
+         src: Tensor, 
+         sample_rate: int, 
+         precision: int = 16, 
+         channels_first: bool = True) -> None:
     raise RuntimeError("No audio I/O backend is available.")
 
 @_mod_utils.requires_sox()
diff --git a/paddlespeech/audio/backends/utils.py b/paddlespeech/audio/backends/utils.py
index 834fbca0a..9ea2eaca7 100644
--- a/paddlespeech/audio/backends/utils.py
+++ b/paddlespeech/audio/backends/utils.py
@@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
             of the system. If ``None`` is provided the  current backend is unassigned.
     """
     if backend is not None and backend not in list_audio_backends():
-        raise RuntimeError(f'Backend "{backend}" is not one of ' f"available backends: {list_audio_backends()}.")
+        raise RuntimeError(f'Backend "{backend}" is not one of '
+                           f"available backends: {list_audio_backends()}.")
 
     if backend is None:
         module = no_backend
@@ -76,6 +77,7 @@ def _init_audio_backend():
         warnings.warn("No audio backend is available.")
         set_audio_backend(None)
 
+
 def get_audio_backend() -> Optional[str]:
     """Get the name of the current backend
 
@@ -88,4 +90,4 @@ def get_audio_backend() -> Optional[str]:
         return "sox_io"
     if paddlespeech.audio.load == soundfile_backend.load:
         return "soundfile"
-    raise ValueError("Unknown backend.")
\ No newline at end of file
+    raise ValueError("Unknown backend.")
diff --git a/paddlespeech/audio/kaldi/kaldi.py b/paddlespeech/audio/kaldi/kaldi.py
index 4f1ad8475..40c6bda91 100644
--- a/paddlespeech/audio/kaldi/kaldi.py
+++ b/paddlespeech/audio/kaldi/kaldi.py
@@ -27,37 +27,38 @@ __all__ = [
 
 
 @module_utils.requires_kaldi()
-def fbank(wav,
-          samp_freq: int=16000,
-          frame_shift_ms: float=10.0,
-          frame_length_ms: float=25.0,
-          dither: float=0.0,
-          preemph_coeff: float=0.97,
-          remove_dc_offset: bool=True,
-          window_type: str='povey',
-          round_to_power_of_two: bool=True,
-          blackman_coeff: float=0.42,
-          snip_edges: bool=True,
-          allow_downsample: bool=False,
-          allow_upsample: bool=False,
-          max_feature_vectors: int=-1,
-          num_bins: int=23,
-          low_freq: float=20,
-          high_freq: float=0,
-          vtln_low: float=100,
-          vtln_high: float=-500,
-          debug_mel: bool=False,
-          htk_mode: bool=False,
-          use_energy: bool=False, # fbank opts
-          energy_floor: float=0.0,
-          raw_energy: bool=True,
-          htk_compat: bool=False,
-          use_log_fbank: bool=True,
-          use_power: bool=True):
+def fbank(
+        wav,
+        samp_freq: int=16000,
+        frame_shift_ms: float=10.0,
+        frame_length_ms: float=25.0,
+        dither: float=0.0,
+        preemph_coeff: float=0.97,
+        remove_dc_offset: bool=True,
+        window_type: str='povey',
+        round_to_power_of_two: bool=True,
+        blackman_coeff: float=0.42,
+        snip_edges: bool=True,
+        allow_downsample: bool=False,
+        allow_upsample: bool=False,
+        max_feature_vectors: int=-1,
+        num_bins: int=23,
+        low_freq: float=20,
+        high_freq: float=0,
+        vtln_low: float=100,
+        vtln_high: float=-500,
+        debug_mel: bool=False,
+        htk_mode: bool=False,
+        use_energy: bool=False,  # fbank opts
+        energy_floor: float=0.0,
+        raw_energy: bool=True,
+        htk_compat: bool=False,
+        use_log_fbank: bool=True,
+        use_power: bool=True):
     frame_opts = FrameExtractionOptions()
     mel_opts = MelBanksOptions()
     fbank_opts = FbankOptions()
-    frame_opts.samp_freq = samp_freq  
+    frame_opts.samp_freq = samp_freq
     frame_opts.frame_shift_ms = frame_shift_ms
     frame_opts.frame_length_ms = frame_length_ms
     frame_opts.dither = dither
@@ -71,7 +72,7 @@ def fbank(wav,
     frame_opts.allow_upsample = allow_upsample
     frame_opts.max_feature_vectors = max_feature_vectors
 
-    mel_opts.num_bins = num_bins  
+    mel_opts.num_bins = num_bins
     mel_opts.low_freq = low_freq
     mel_opts.high_freq = high_freq
     mel_opts.vtln_low = vtln_low
@@ -79,7 +80,7 @@ def fbank(wav,
     mel_opts.debug_mel = debug_mel
     mel_opts.htk_mode = htk_mode
 
-    fbank_opts.use_energy = use_energy  
+    fbank_opts.use_energy = use_energy
     fbank_opts.energy_floor = energy_floor
     fbank_opts.raw_energy = raw_energy
     fbank_opts.htk_compat = htk_compat
@@ -88,6 +89,7 @@ def fbank(wav,
     feat = ComputeFbank(frame_opts, mel_opts, fbank_opts, wav)
     return feat
 
+
 @module_utils.requires_kaldi()
 def pitch(wav,
           samp_freq: int=16000,
@@ -114,7 +116,7 @@ def pitch(wav,
     pitch_opts.samp_freq = samp_freq
     pitch_opts.frame_shift_ms = frame_shift_ms
     pitch_opts.frame_length_ms = frame_length_ms
-    pitch_opts.preemph_coeff = preemph_coeff 
+    pitch_opts.preemph_coeff = preemph_coeff
     pitch_opts.min_f0 = min_f0
     pitch_opts.max_f0 = max_f0
     pitch_opts.soft_min_f0 = soft_min_f0
diff --git a/paddlespeech/audio/src/CMakeLists.txt b/paddlespeech/audio/src/CMakeLists.txt
index 4249e04e2..eea07f637 100644
--- a/paddlespeech/audio/src/CMakeLists.txt
+++ b/paddlespeech/audio/src/CMakeLists.txt
@@ -105,7 +105,7 @@ function(define_extension name sources include_dirs libraries definitions)
   add_library(${name} SHARED ${sources})
   target_compile_definitions(${name} PRIVATE "${definitions}")
   target_include_directories(
-    ${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${include_dirs})
+    ${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${pybind11_INCLUDE_DIR} ${include_dirs})
   target_link_libraries(
     ${name}
     ${libraries}
diff --git a/paddlespeech/audio/src/pybind/kaldi/feature_common.h b/paddlespeech/audio/src/pybind/kaldi/feature_common.h
index dbac4ceac..05522bb7e 100644
--- a/paddlespeech/audio/src/pybind/kaldi/feature_common.h
+++ b/paddlespeech/audio/src/pybind/kaldi/feature_common.h
@@ -14,8 +14,8 @@
 
 #pragma once
 
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
+#include "pybind11/pybind11.h"
+#include "pybind11/numpy.h"
 #include "feat/feature-window.h"
 
 namespace paddleaudio {