diff --git a/speechx/build.sh b/speechx/build.sh
index e0a386752..7655f9635 100755
--- a/speechx/build.sh
+++ b/speechx/build.sh
@@ -20,4 +20,4 @@ fi
 mkdir -p build
 
 cmake -B build -DBOOST_ROOT:STRING=${boost_SOURCE_DIR}
-cmake --build build
+cmake --build build -j
diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
index a0fe5b2ac..04530fb9d 100644
--- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
@@ -76,11 +76,15 @@ void CTCPrefixBeamSearch::AdvanceDecode(
         // forward frame by frame
         std::vector<kaldi::BaseFloat> frame_prob;
         bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
-        if (flag == false) break;
+        if (flag == false) {
+            LOG(INFO) << "decoder advance decode exit." << frame_prob.size();
+            break;
+        }
 
         std::vector<std::vector<kaldi::BaseFloat>> likelihood;
         likelihood.push_back(frame_prob);
         AdvanceDecoding(likelihood);
+        VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;
     }
 }
 
@@ -114,7 +118,11 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
         std::vector<float> topk_score;
         std::vector<int32_t> topk_index;
         TopK(logp_t, first_beam_size, &topk_score, &topk_index);
-
+        VLOG(2) << "topk: " << num_frame_decoded_ << " " <<  *std::max_element(logp_t.begin(), logp_t.end()) << " " << topk_score[0];
+        for (int i = 0; i < topk_score.size(); i++){
+             VLOG(2) << "topk: " << num_frame_decoded_ << " " << topk_score[i];
+        }
+       
         // 2. token passing
         for (int i = 0; i < topk_index.size(); ++i) {
             int id = topk_index[i];
@@ -295,7 +303,18 @@ void CTCPrefixBeamSearch::UpdateOutputs(
     outputs_.emplace_back(output);
 }
 
-void CTCPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
+void CTCPrefixBeamSearch::FinalizeSearch() { 
+    UpdateFinalContext(); 
+    
+    VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;
+    int cnt = 0;
+    for (int i = 0; i < hypotheses_.size(); i ++){
+        VLOG(2) << "hyp " << cnt << " len: " << hypotheses_[i].size() << " ctc score: " << likelihood_[i];
+        for (int j = 0; j < hypotheses_[i].size(); j ++){
+            VLOG(2) <<  hypotheses_[i][j];
+        }
+    }
+}
 
 void CTCPrefixBeamSearch::UpdateFinalContext() {
     if (context_graph_ == nullptr) return;
diff --git a/speechx/speechx/frontend/audio/assembler.cc b/speechx/speechx/frontend/audio/assembler.cc
index ff1b1f28f..afee3a6a3 100644
--- a/speechx/speechx/frontend/audio/assembler.cc
+++ b/speechx/speechx/frontend/audio/assembler.cc
@@ -52,15 +52,21 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
         Vector<BaseFloat> feature;
         result = base_extractor_->Read(&feature);
         if (result == false || feature.Dim() == 0) {
-            if (IsFinished() == false) return false;
-            break;
+            VLOG(1) << "result: " << result << "feature dim: " << feature.Dim();
+            if (IsFinished() == false) {
+                LOG(INFO) << "finished reading feature. cache size: " << feature_cache_.size();
+                return false;
+            } else {
+                LOG(INFO) << "break";
+                break;
+            }
         }
 
         CHECK(feature.Dim() == dim_);
+        feature_cache_.push(feature);
+
         nframes_ += 1;
         VLOG(1) << "nframes: " << nframes_;
-
-        feature_cache_.push(feature);
     }
 
     if (feature_cache_.size() < receptive_filed_length_) {
@@ -68,8 +74,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
         return false;
     }
 
-
-    if (fill_zero_){
+    if (fill_zero_) {
         while (feature_cache_.size() < frame_chunk_size_) {
             Vector<BaseFloat> feature(dim_, kaldi::kSetZero);
             nframes_ += 1;
@@ -79,6 +84,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
 
     int32 this_chunk_size = std::min(static_cast<int32>(feature_cache_.size()), frame_chunk_size_);
     feats->Resize(dim_ * this_chunk_size);
+    VLOG(1) << "read " << this_chunk_size << " feat.";
 
     int32 counter = 0;
     while (counter < this_chunk_size) {
@@ -97,6 +103,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
   
         counter++;
     }
+    CHECK(feature_cache_.size() == cache_size_ );
 
     return result;
 }
diff --git a/speechx/speechx/frontend/audio/feature_cache.h b/speechx/speechx/frontend/audio/feature_cache.h
index 09d7f7ebf..b4ed58fff 100644
--- a/speechx/speechx/frontend/audio/feature_cache.h
+++ b/speechx/speechx/frontend/audio/feature_cache.h
@@ -41,12 +41,14 @@ class FeatureCache : public FrontendInterface {
     virtual size_t Dim() const { return dim_; }
 
     virtual void SetFinished() {
+        LOG(INFO) << "set finished";
         // std::unique_lock<std::mutex> lock(mutex_);
         base_extractor_->SetFinished();
-        LOG(INFO) << "set finished";
+
         // read the last chunk data
         Compute();
         // ready_feed_condition_.notify_one();
+        LOG(INFO) << "compute last feats done.";
     }
 
     virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/nnet/decodable.cc
index 9bad8ed45..6956a2cb8 100644
--- a/speechx/speechx/nnet/decodable.cc
+++ b/speechx/speechx/nnet/decodable.cc
@@ -36,8 +36,6 @@ void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
     frames_ready_ += likelihood.NumRows();
 }
 
-// Decodable::Init(DecodableConfig config) {
-//}
 
 // return the size of frame have computed.
 int32 Decodable::NumFramesReady() const { return frames_ready_; }
@@ -70,9 +68,10 @@ bool Decodable::AdvanceChunk() {
     Vector<BaseFloat> features;
     if (frontend_ == NULL || frontend_->Read(&features) == false) {
         // no feat or frontend_ not init.
+        VLOG(1) << "decodable exit;";
         return false;
     }
-    VLOG(2) << "Forward with " << features.Dim() << " frames.";
+    VLOG(2) << "Forward in " << features.Dim() / frontend_->Dim() << " feats.";
 
     // forward feats
     NnetOut out;
@@ -80,6 +79,7 @@ bool Decodable::AdvanceChunk() {
     int32& vocab_dim = out.vocab_dim;
     Vector<BaseFloat>& logprobs = out.logprobs;
 
+    VLOG(2) << "Forward out " << logprobs.Dim() / vocab_dim  << " decoder frames.";
     // cache nnet outupts
     nnet_out_cache_.Resize(logprobs.Dim() / vocab_dim, vocab_dim);
     nnet_out_cache_.CopyRowsFromVec(logprobs);
@@ -114,15 +114,20 @@ bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
 // read one frame likelihood
 bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
     if (EnsureFrameHaveComputed(frame) == false) {
+        LOG(INFO) << "framelikehood exit.";
         return false;
     }
 
+    int nrows = nnet_out_cache_.NumRows();
+    CHECK(nrows == (frames_ready_ - frame_offset_));
     int vocab_size = nnet_out_cache_.NumCols();
     likelihood->resize(vocab_size);
 
     for (int32 idx = 0; idx < vocab_size; ++idx) {
         (*likelihood)[idx] =
             nnet_out_cache_(frame - frame_offset_, idx) * acoustic_scale_;
+
+        VLOG(4) << "nnet out: " << frame  << " offset:" << frame_offset_  << " " << nnet_out_cache_.NumRows() << " logprob: " <<  nnet_out_cache_(frame - frame_offset_, idx);
     }
     return true;
 }
diff --git a/speechx/speechx/nnet/u2_nnet.cc b/speechx/speechx/nnet/u2_nnet.cc
index 63a8a793a..07e2dde2a 100644
--- a/speechx/speechx/nnet/u2_nnet.cc
+++ b/speechx/speechx/nnet/u2_nnet.cc
@@ -440,6 +440,7 @@ void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
         max_hyps_len = std::max(max_hyps_len, len);
         hyps_len_ptr[i] = static_cast<int64_t>(len);
     }
+    VLOG(2) << "max_hyps_len: " << max_hyps_len;
 
     paddle::Tensor hyps_tensor =
         paddle::full({num_hyps, max_hyps_len}, eos_, paddle::DataType::INT64);
@@ -625,8 +626,8 @@ void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
         // combinded left-to-right and right-to-lfet score
         (*rescoring_score)[i] =
             score * (1 - reverse_weight) + r_score * reverse_weight;
-        VLOG(1) << "hyp " << i << " score: " << score << " r_score: " << r_score
-                << " reverse_weight: " << reverse_weight;
+        VLOG(1) << "hyp " << i << " " << hyp.size() << " score: " << score << " r_score: " << r_score
+                << " reverse_weight: " << reverse_weight << " final score: " << (*rescoring_score)[i];
     }
 }
 
diff --git a/speechx/speechx/recognizer/u2_recognizer.cc b/speechx/speechx/recognizer/u2_recognizer.cc
index 75834aa5d..b4a1257b6 100644
--- a/speechx/speechx/recognizer/u2_recognizer.cc
+++ b/speechx/speechx/recognizer/u2_recognizer.cc
@@ -52,7 +52,6 @@ void U2Recognizer::Reset() {
     num_frames_ = 0;
     result_.clear();
 
-    feature_pipeline_->Reset();
     decodable_->Reset();
     decoder_->Reset();
 }
@@ -62,7 +61,6 @@ void U2Recognizer::ResetContinuousDecoding() {
     num_frames_ = 0;
     result_.clear();
 
-    feature_pipeline_->Reset();
     decodable_->Reset();
     decoder_->Reset();
 }
@@ -192,10 +190,12 @@ void U2Recognizer::AttentionRescoring() {
     // combine ctc score and rescoring score
     for (size_t i = 0; i < num_hyps; i++) {
         VLOG(1) << "hyp " << i << " rescoring_score: " << rescoring_score[i]
-                << " ctc_score: " << result_[i].score;
+                << " ctc_score: " << result_[i].score << " rescoring_weight: " <<  opts_.decoder_opts.rescoring_weight << " ctc_weight: " <<  opts_.decoder_opts.ctc_weight;
         result_[i].score =
             opts_.decoder_opts.rescoring_weight * rescoring_score[i] +
             opts_.decoder_opts.ctc_weight * result_[i].score;
+
+        VLOG(1) << "hyp: " << result_[0].sentence << " score: " << result_[0].score;
     }
 
     std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
diff --git a/speechx/speechx/recognizer/u2_recognizer_main.cc b/speechx/speechx/recognizer/u2_recognizer_main.cc
index 2375586ea..bfb37fb8e 100644
--- a/speechx/speechx/recognizer/u2_recognizer_main.cc
+++ b/speechx/speechx/recognizer/u2_recognizer_main.cc
@@ -62,6 +62,7 @@ int main(int argc, char* argv[]) {
         LOG(INFO) << "wav len (sample): " << tot_samples;
 
         int sample_offset = 0;
+        int cnt = 0;
         while (sample_offset < tot_samples) {
             int cur_chunk_size =
                 std::min(chunk_sample_size, tot_samples - sample_offset);
@@ -77,12 +78,14 @@ int main(int argc, char* argv[]) {
                 recognizer.SetFinished();
             }
             recognizer.Decode();
-            LOG(INFO) << "Pratial result: " << recognizer.GetPartialResult();
+            LOG(INFO) << "Pratial result: " << cnt << " " << recognizer.GetPartialResult();
 
             // no overlap
             sample_offset += cur_chunk_size;
+            cnt++;
         }
         CHECK(sample_offset == tot_samples);
+        VLOG(1) << "num decode: " << cnt;
 
         // recognizer.SetFinished();