diff --git a/audio/README.md b/audio/README.md
index f336ac9ae..bfd8625f0 100644
--- a/audio/README.md
+++ b/audio/README.md
@@ -29,7 +29,6 @@ MAC：test build whl envrioment：
 * gcc/g++/gfortran 12.2.0
 * cpu Intel Xeon E5 x86_64
 
-
 Windows：
 not support： paddleaudio C++ extension lib (sox io, kaldi native fbank)
-python setup.py bdist_wheel
\ No newline at end of file
+python setup.py bdist_wheel
diff --git a/audio/paddleaudio/src/pybind/pybind.cpp b/audio/paddleaudio/src/pybind/pybind.cpp
index c4dfa8d51..692e80995 100644
--- a/audio/paddleaudio/src/pybind/pybind.cpp
+++ b/audio/paddleaudio/src/pybind/pybind.cpp
@@ -1,7 +1,9 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 
+#ifdef INCLUDE_KALDI
 #include "paddleaudio/src/pybind/kaldi/kaldi_feature.h"
 #include "paddleaudio/third_party/kaldi/feat/feature-fbank.h"
+#endif
 
 #ifdef INCLUDE_SOX
 #include "paddleaudio/src/pybind/sox/io.h"
diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py
index 016087d68..31defbbaf 100644
--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@@ -24,9 +24,9 @@ from typing import Tuple
 import paddle
 from paddle import jit
 from paddle import nn
-from paddleaudio.utils.tensor_utils import add_sos_eos
-from paddleaudio.utils.tensor_utils import th_accuracy
 
+from paddlespeech.audio.utils.tensor_utils import add_sos_eos
+from paddlespeech.audio.utils.tensor_utils import th_accuracy
 from paddlespeech.s2t.frontend.utility import IGNORE_ID
 from paddlespeech.s2t.frontend.utility import load_cmvn
 from paddlespeech.s2t.modules.cmvn import GlobalCMVN
diff --git a/paddlespeech/s2t/models/whisper/whipser.py b/paddlespeech/s2t/models/whisper/whipser.py
index ba9983338..63cafbdb7 100644
--- a/paddlespeech/s2t/models/whisper/whipser.py
+++ b/paddlespeech/s2t/models/whisper/whipser.py
@@ -16,7 +16,6 @@ from typing import Union
 
 import numpy as np
 import paddle
-import paddle.fluid as fluid
 import paddle.nn.functional as F
 import soundfile
 import tqdm
@@ -231,8 +230,8 @@ class TextDecoder(nn.Layer):
         ])
         self.ln = LayerNorm(n_state)
 
-        mask = fluid.layers.fill_constant(
-            shape=[n_ctx, n_state], value=-np.inf, dtype='float32')
+        mask = paddle.full(
+            shape=[n_ctx, n_state], fill_value=-np.inf, dtype='float32')
         mask = paddle.triu(mask, diagonal=1)
         self.register_buffer("mask", mask, persistable=False)
 
diff --git a/speechx/build.sh b/speechx/build.sh
index e0a386752..7655f9635 100755
--- a/speechx/build.sh
+++ b/speechx/build.sh
@@ -20,4 +20,4 @@ fi
 mkdir -p build
 
 cmake -B build -DBOOST_ROOT:STRING=${boost_SOURCE_DIR}
-cmake --build build
+cmake --build build -j
diff --git a/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md b/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md
index 09584fd57..ef88357ee 100644
--- a/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md
+++ b/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md
@@ -6,8 +6,11 @@
 
 > Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz, support `avx512_vnni`
 > RTF with feature and decoder which is more end to end.
+
 ### FP32
 
+`local/recognizer.sh`
+
 #### CER
 
 ```
@@ -27,6 +30,8 @@ I1027 10:52:38.662876 51665 u2_recognizer_main.cc:123] RTF is: 0.309318
 
 ### INT8
 
+`local/recognizer_quant.sh`
+
 > RTF relative improve 12.8%, which count feature and decoder time.
 > Test under Paddle commit c331e2ce2031d68a553bc9469a07c30d718438f3  
 
@@ -46,3 +51,17 @@ I1110 09:59:52.551712 37249 u2_recognizer_main.cc:122] total wav duration is: 36
 I1110 09:59:52.551717 37249 u2_recognizer_main.cc:123] total decode cost:9737.63 sec
 I1110 09:59:52.551723 37249 u2_recognizer_main.cc:124] RTF is: 0.269674
 ```
+
+### CTC Prefix Beam Search
+
+`local/decode.sh`
+
+#### CER
+
+```
+Overall -> 6.74 % N=104765 C=98106 S=6516 D=143 I=401
+Mandarin -> 6.74 % N=104762 C=98106 S=6513 D=143 I=401
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+
+```