rm ds2 && rm boost

3 years ago · f37f34d3ce
parent 0547d7961e
commit f37f34d3ce
32 changed files with 1265 additions and 3277 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -57,13 +57,13 @@ repos:
        entry: bash .pre-commit-hooks/clang-format.hook -i
        language: system
        files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
-        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
+        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders|speechx/speechx/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
    -   id: cpplint
        name: cpplint
        description: Static code analysis of C/C++ files
        language: python
        files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
-        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
+        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders|speechx/speechx/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
        entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent
 -   repo: https://github.com/asottile/reorder_python_imports
    rev: v2.4.0
--- a/speechx/CMakeLists.txt
+++ b/speechx/CMakeLists.txt
@ -44,9 +44,6 @@ option(TEST_DEBUG "option for debug" OFF)
 option(USE_PROFILING "enable c++ profling" OFF)
 option(WITH_TESTING "unit test" ON)

-option(USING_U2  "compile u2 model." ON)
-option(USING_DS2 "compile with ds2 model." OFF)
-
 option(USING_GPU "u2 compute on GPU." OFF)

 ###############################################################################
@ -56,21 +53,6 @@ include(gflags)

 include(glog)

-# boost
-# include(boost) # not work
-set(boost_SOURCE_DIR ${fc_patch}/boost-src)
-set(BOOST_ROOT ${boost_SOURCE_DIR})
-include_directories(${boost_SOURCE_DIR})
-link_directories(${boost_SOURCE_DIR}/stage/lib)
-
-# Eigen
-include(eigen)
-find_package(Eigen3 REQUIRED)
-
-# Kenlm
-include(kenlm)
-add_dependencies(kenlm eigen boost)
-
 #openblas
 include(openblas)

--- a/speechx/build.sh
+++ b/speechx/build.sh
@ -4,20 +4,5 @@ set -xe
 # the build script had verified in the paddlepaddle docker image.
 # please follow the instruction below to install PaddlePaddle image.
 # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html 
-boost_SOURCE_DIR=$PWD/fc_patch/boost-src
-if [ ! -d ${boost_SOURCE_DIR} ]; then wget -c https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz 
-  tar xzfv boost_1_75_0.tar.gz
-  mkdir -p $PWD/fc_patch
-  mv boost_1_75_0 ${boost_SOURCE_DIR} 
-  cd ${boost_SOURCE_DIR}
-  bash ./bootstrap.sh
-  ./b2
-  cd -
-  echo -e "\n"
-fi
-
-#rm -rf build
-mkdir -p build
-
-cmake -B build -DBOOST_ROOT:STRING=${boost_SOURCE_DIR}
+cmake -B build
 cmake --build build -j
--- a/speechx/examples/u2pp_ol/wenetspeech/path.sh
+++ b/speechx/examples/u2pp_ol/wenetspeech/path.sh
@ -3,7 +3,7 @@
 unset GREP_OPTIONS

 SPEECHX_ROOT=$PWD/../../../
-SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx
+SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx/asr

 SPEECHX_TOOLS=$SPEECHX_ROOT/tools
 TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
@ -12,7 +12,7 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

 export LC_AL=C

-export PATH=$PATH:$TOOLS_BIN:$SPEECHX_BUILD/nnet:$SPEECHX_BUILD/decoder:$SPEECHX_BUILD/frontend/audio:$SPEECHX_BUILD/recognizer
+export PATH=$PATH:$TOOLS_BIN:$SPEECHX_BUILD/nnet:$SPEECHX_BUILD/decoder:$SPEECHX_BUILD/../common/frontend/audio:$SPEECHX_BUILD/recognizer

 PADDLE_LIB_PATH=$(python -c "import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);")
 export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH
--- a/speechx/speechx/asr/decoder/CMakeLists.txt
+++ b/speechx/speechx/asr/decoder/CMakeLists.txt
@ -1,43 +1,12 @@
-include_directories(${CMAKE_CURRENT_SOURCE_DIR/ctc_decoders})
-
 set(srcs)
-
-if (USING_DS2)
-list(APPEND srcs
-  ctc_decoders/decoder_utils.cpp
-  ctc_decoders/path_trie.cpp
-  ctc_decoders/scorer.cpp
-  ctc_beam_search_decoder.cc
-  ctc_tlg_decoder.cc
-)
-endif()
-
-if (USING_U2)
 list(APPEND srcs
  ctc_prefix_beam_search_decoder.cc
 )
-endif()

 add_library(decoder STATIC ${srcs})
-target_link_libraries(decoder PUBLIC kenlm utils fst frontend nnet kaldi-decoder)
+target_link_libraries(decoder PUBLIC utils fst frontend nnet kaldi-decoder)

 # test
-if (USING_DS2)
-  set(BINS 
-    ctc_beam_search_decoder_main
-    nnet_logprob_decoder_main
-    ctc_tlg_decoder_main
-  )
-
-  foreach(bin_name IN LISTS BINS)
-    add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
-    target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-    target_link_libraries(${bin_name} PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
-  endforeach()
-endif()
-
-
-if (USING_U2)
 set(TEST_BINS 
  ctc_prefix_beam_search_decoder_main
 )
@ -51,5 +20,3 @@ if (USING_U2)
  target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS})
 endforeach()

-endif()
-
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
@ -1,313 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "decoder/ctc_beam_search_decoder.h"
-
-#include "base/common.h"
-#include "decoder/ctc_decoders/decoder_utils.h"
-#include "utils/file_utils.h"
-
-namespace ppspeech {
-
-using std::vector;
-using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
-
-CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts)
-    : opts_(opts), init_ext_scorer_(nullptr), space_id_(-1), root_(nullptr) {
-    LOG(INFO) << "dict path: " << opts_.dict_file;
-    if (!ReadFileToVector(opts_.dict_file, &vocabulary_)) {
-        LOG(INFO) << "load the dict failed";
-    }
-    LOG(INFO) << "read the vocabulary success, dict size: "
-              << vocabulary_.size();
-
-    LOG(INFO) << "language model path: " << opts_.lm_path;
-    if (opts_.lm_path != "") {
-        init_ext_scorer_ = std::make_shared<Scorer>(
-            opts_.alpha, opts_.beta, opts_.lm_path, vocabulary_);
-    }
-
-    CHECK_EQ(opts_.blank, 0);
-
-    auto it = std::find(vocabulary_.begin(), vocabulary_.end(), " ");
-    space_id_ = it - vocabulary_.begin();
-    // if no space in vocabulary
-    if (static_cast<size_t>(space_id_) >= vocabulary_.size()) {
-        space_id_ = -2;
-    }
-}
-
-void CTCBeamSearch::Reset() {
-    // num_frame_decoded_ = 0;
-    // ResetPrefixes();
-    InitDecoder();
-}
-
-void CTCBeamSearch::InitDecoder() {
-    num_frame_decoded_ = 0;
-    // ResetPrefixes();
-    prefixes_.clear();
-
-    root_ = std::make_shared<PathTrie>();
-    root_->score = root_->log_prob_b_prev = 0.0;
-    prefixes_.push_back(root_.get());
-    if (init_ext_scorer_ != nullptr &&
-        !init_ext_scorer_->is_character_based()) {
-        auto fst_dict =
-            static_cast<fst::StdVectorFst*>(init_ext_scorer_->dictionary);
-        fst::StdVectorFst* dict_ptr = fst_dict->Copy(true);
-        root_->set_dictionary(dict_ptr);
-
-        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
-        root_->set_matcher(matcher);
-    }
-}
-
-void CTCBeamSearch::Decode(
-    std::shared_ptr<kaldi::DecodableInterface> decodable) {
-    return;
-}
-
-// todo rename, refactor
-void CTCBeamSearch::AdvanceDecode(
-    const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
-    while (1) {
-        vector<vector<BaseFloat>> likelihood;
-        vector<BaseFloat> frame_prob;
-        bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
-        if (flag == false) break;
-        likelihood.push_back(frame_prob);
-        AdvanceDecoding(likelihood);
-    }
-}
-
-void CTCBeamSearch::ResetPrefixes() {
-    for (size_t i = 0; i < prefixes_.size(); i++) {
-        if (prefixes_[i] != nullptr) {
-            delete prefixes_[i];
-            prefixes_[i] = nullptr;
-        }
-    }
-    prefixes_.clear();
-}
-
-int CTCBeamSearch::DecodeLikelihoods(const vector<vector<float>>& probs,
-                                     const vector<string>& nbest_words) {
-    kaldi::Timer timer;
-    AdvanceDecoding(probs);
-    LOG(INFO) << "ctc decoding elapsed time(s) "
-              << static_cast<float>(timer.Elapsed()) / 1000.0f;
-    return 0;
-}
-
-vector<std::pair<double, string>> CTCBeamSearch::GetNBestPath(int n) {
-    int beam_size = n == -1 ? opts_.beam_size : std::min(n, opts_.beam_size);
-    return get_beam_search_result(prefixes_, vocabulary_, beam_size);
-}
-
-vector<std::pair<double, string>> CTCBeamSearch::GetNBestPath() {
-    return GetNBestPath(-1);
-}
-
-string CTCBeamSearch::GetBestPath() {
-    std::vector<std::pair<double, std::string>> result;
-    result = get_beam_search_result(prefixes_, vocabulary_, opts_.beam_size);
-    return result[0].second;
-}
-
-string CTCBeamSearch::GetFinalBestPath() {
-    CalculateApproxScore();
-    LMRescore();
-    return GetBestPath();
-}
-
-void CTCBeamSearch::AdvanceDecoding(const vector<vector<BaseFloat>>& probs) {
-    size_t num_time_steps = probs.size();
-    size_t beam_size = opts_.beam_size;
-    double cutoff_prob = opts_.cutoff_prob;
-    size_t cutoff_top_n = opts_.cutoff_top_n;
-
-    vector<vector<double>> probs_seq(probs.size(),
-                                     vector<double>(probs[0].size(), 0));
-
-    int row = probs.size();
-    int col = probs[0].size();
-    for (int i = 0; i < row; i++) {
-        for (int j = 0; j < col; j++) {
-            probs_seq[i][j] = static_cast<double>(probs[i][j]);
-        }
-    }
-
-    for (size_t time_step = 0; time_step < num_time_steps; time_step++) {
-        const auto& prob = probs_seq[time_step];
-
-        float min_cutoff = -NUM_FLT_INF;
-        bool full_beam = false;
-        if (init_ext_scorer_ != nullptr) {
-            size_t num_prefixes_ = std::min(prefixes_.size(), beam_size);
-            std::sort(prefixes_.begin(),
-                      prefixes_.begin() + num_prefixes_,
-                      prefix_compare);
-
-            if (num_prefixes_ == 0) {
-                continue;
-            }
-            min_cutoff = prefixes_[num_prefixes_ - 1]->score +
-                         std::log(prob[opts_.blank]) -
-                         std::max(0.0, init_ext_scorer_->beta);
-
-            full_beam = (num_prefixes_ == beam_size);
-        }
-
-        vector<std::pair<size_t, float>> log_prob_idx =
-            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
-
-        // loop over chars
-        size_t log_prob_idx_len = log_prob_idx.size();
-        for (size_t index = 0; index < log_prob_idx_len; index++) {
-            SearchOneChar(full_beam, log_prob_idx[index], min_cutoff);
-        }
-
-        prefixes_.clear();
-
-        // update log probs
-        root_->iterate_to_vec(prefixes_);
-        // only preserve top beam_size prefixes_
-        if (prefixes_.size() >= beam_size) {
-            std::nth_element(prefixes_.begin(),
-                             prefixes_.begin() + beam_size,
-                             prefixes_.end(),
-                             prefix_compare);
-            for (size_t i = beam_size; i < prefixes_.size(); ++i) {
-                prefixes_[i]->remove();
-            }
-        }  // end if
-        num_frame_decoded_++;
-    }  // end for probs_seq
-}
-
-int32 CTCBeamSearch::SearchOneChar(
-    const bool& full_beam,
-    const std::pair<size_t, BaseFloat>& log_prob_idx,
-    const BaseFloat& min_cutoff) {
-    size_t beam_size = opts_.beam_size;
-    const auto& c = log_prob_idx.first;
-    const auto& log_prob_c = log_prob_idx.second;
-    size_t prefixes_len = std::min(prefixes_.size(), beam_size);
-
-    for (size_t i = 0; i < prefixes_len; ++i) {
-        auto prefix = prefixes_[i];
-        if (full_beam && log_prob_c + prefix->score < min_cutoff) {
-            break;
-        }
-
-        if (c == opts_.blank) {
-            prefix->log_prob_b_cur =
-                log_sum_exp(prefix->log_prob_b_cur, log_prob_c + prefix->score);
-            continue;
-        }
-
-        // repeated character
-        if (c == prefix->character) {
-            // p_{nb}(l;x_{1:t}) = p(c;x_{t})p(l;x_{1:t-1})
-            prefix->log_prob_nb_cur = log_sum_exp(
-                prefix->log_prob_nb_cur, log_prob_c + prefix->log_prob_nb_prev);
-        }
-
-        // get new prefix
-        auto prefix_new = prefix->get_path_trie(c);
-        if (prefix_new != nullptr) {
-            float log_p = -NUM_FLT_INF;
-            if (c == prefix->character &&
-                prefix->log_prob_b_prev > -NUM_FLT_INF) {
-                // p_{nb}(l^{+};x_{1:t}) = p(c;x_{t})p_{b}(l;x_{1:t-1})
-                log_p = log_prob_c + prefix->log_prob_b_prev;
-            } else if (c != prefix->character) {
-                // p_{nb}(l^{+};x_{1:t}) = p(c;x_{t}) p(l;x_{1:t-1})
-                log_p = log_prob_c + prefix->score;
-            }
-
-            // language model scoring
-            if (init_ext_scorer_ != nullptr &&
-                (c == space_id_ || init_ext_scorer_->is_character_based())) {
-                PathTrie* prefix_to_score = nullptr;
-                // skip scoring the space
-                if (init_ext_scorer_->is_character_based()) {
-                    prefix_to_score = prefix_new;
-                } else {
-                    prefix_to_score = prefix;
-                }
-
-                float score = 0.0;
-                vector<string> ngram;
-                ngram = init_ext_scorer_->make_ngram(prefix_to_score);
-                // lm score: p_{lm}(W)^{\alpha} + \beta
-                score = init_ext_scorer_->get_log_cond_prob(ngram) *
-                        init_ext_scorer_->alpha;
-                log_p += score;
-                log_p += init_ext_scorer_->beta;
-            }
-            // p_{nb}(l;x_{1:t})
-            prefix_new->log_prob_nb_cur =
-                log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
-        }
-    }  // end of loop over prefix
-    return 0;
-}
-
-void CTCBeamSearch::CalculateApproxScore() {
-    size_t beam_size = opts_.beam_size;
-    size_t num_prefixes_ = std::min(prefixes_.size(), beam_size);
-    std::sort(
-        prefixes_.begin(), prefixes_.begin() + num_prefixes_, prefix_compare);
-
-    // compute aproximate ctc score as the return score, without affecting the
-    // return order of decoding result. To delete when decoder gets stable.
-    for (size_t i = 0; i < beam_size && i < prefixes_.size(); ++i) {
-        double approx_ctc = prefixes_[i]->score;
-        if (init_ext_scorer_ != nullptr) {
-            vector<int> output;
-            prefixes_[i]->get_path_vec(output);
-            auto prefix_length = output.size();
-            auto words = init_ext_scorer_->split_labels(output);
-            // remove word insert
-            approx_ctc = approx_ctc - prefix_length * init_ext_scorer_->beta;
-            // remove language model weight:
-            approx_ctc -= (init_ext_scorer_->get_sent_log_prob(words)) *
-                          init_ext_scorer_->alpha;
-        }
-        prefixes_[i]->approx_ctc = approx_ctc;
-    }
-}
-
-void CTCBeamSearch::LMRescore() {
-    size_t beam_size = opts_.beam_size;
-    if (init_ext_scorer_ != nullptr &&
-        !init_ext_scorer_->is_character_based()) {
-        for (size_t i = 0; i < beam_size && i < prefixes_.size(); ++i) {
-            auto prefix = prefixes_[i];
-            if (!prefix->is_empty() && prefix->character != space_id_) {
-                float score = 0.0;
-                vector<string> ngram = init_ext_scorer_->make_ngram(prefix);
-                score = init_ext_scorer_->get_log_cond_prob(ngram) *
-                        init_ext_scorer_->alpha;
-                score += init_ext_scorer_->beta;
-                prefix->score += score;
-            }
-        }
-    }
-}
-
-}  // namespace ppspeech
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
@ -1,73 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// used by deepspeech2
-
-#pragma once
-
-#include "decoder/ctc_beam_search_opt.h"
-#include "decoder/ctc_decoders/path_trie.h"
-#include "decoder/ctc_decoders/scorer.h"
-#include "decoder/decoder_itf.h"
-
-namespace ppspeech {
-
-class CTCBeamSearch : public DecoderBase {
-  public:
-    explicit CTCBeamSearch(const CTCBeamSearchOptions& opts);
-    ~CTCBeamSearch() {}
-
-    void InitDecoder();
-
-    void Reset();
-
-    void AdvanceDecode(
-        const std::shared_ptr<kaldi::DecodableInterface>& decodable);
-
-    void Decode(std::shared_ptr<kaldi::DecodableInterface> decodable);
-
-    std::string GetBestPath();
-    std::vector<std::pair<double, std::string>> GetNBestPath();
-    std::vector<std::pair<double, std::string>> GetNBestPath(int n);
-    std::string GetFinalBestPath();
-
-    std::string GetPartialResult() {
-        CHECK(false) << "Not implement.";
-        return {};
-    }
-
-    int DecodeLikelihoods(const std::vector<std::vector<BaseFloat>>& probs,
-                          const std::vector<std::string>& nbest_words);
-
-  private:
-    void ResetPrefixes();
-
-    int32 SearchOneChar(const bool& full_beam,
-                        const std::pair<size_t, BaseFloat>& log_prob_idx,
-                        const BaseFloat& min_cutoff);
-    void CalculateApproxScore();
-    void LMRescore();
-    void AdvanceDecoding(const std::vector<std::vector<BaseFloat>>& probs);
-
-    CTCBeamSearchOptions opts_;
-    std::shared_ptr<Scorer> init_ext_scorer_;  // todo separate later
-    std::vector<std::string> vocabulary_;      // todo remove later
-    int space_id_;
-    std::shared_ptr<PathTrie> root_;
-    std::vector<PathTrie*> prefixes_;
-
-    DISALLOW_COPY_AND_ASSIGN(CTCBeamSearch);
-};
-
-}  // namespace ppspeech
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
@ -1,167 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// used by deepspeech2
-
-#include "base/flags.h"
-#include "base/log.h"
-#include "decoder/ctc_beam_search_decoder.h"
-#include "frontend/audio/data_cache.h"
-#include "kaldi/util/table-types.h"
-#include "nnet/decodable.h"
-#include "nnet/ds2_nnet.h"
-
-DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
-DEFINE_string(result_wspecifier, "", "test result wspecifier");
-DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
-DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param");
-DEFINE_string(dict_file, "vocab.txt", "vocabulary of lm");
-DEFINE_string(lm_path, "", "language model");
-DEFINE_int32(receptive_field_length,
-             7,
-             "receptive field of two CNN(kernel=3) downsampling module.");
-DEFINE_int32(subsampling_rate,
-             4,
-             "two CNN(kernel=3) module downsampling rate.");
-DEFINE_string(
-    model_input_names,
-    "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box",
-    "model input names");
-DEFINE_string(model_output_names,
-              "softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0",
-              "model output names");
-DEFINE_string(model_cache_names,
-              "chunk_state_h_box,chunk_state_c_box",
-              "model cache names");
-DEFINE_string(model_cache_shapes, "5-1-1024,5-1-1024", "model cache shapes");
-DEFINE_int32(nnet_decoder_chunk, 1, "paddle nnet forward chunk");
-
-using kaldi::BaseFloat;
-using kaldi::Matrix;
-using std::vector;
-
-// test ds2 online decoder by feeding speech feature
-int main(int argc, char* argv[]) {
-    gflags::SetUsageMessage("Usage:");
-    gflags::ParseCommandLineFlags(&argc, &argv, false);
-    google::InitGoogleLogging(argv[0]);
-    google::InstallFailureSignalHandler();
-    FLAGS_logtostderr = 1;
-
-    CHECK_NE(FLAGS_result_wspecifier, "");
-    CHECK_NE(FLAGS_feature_rspecifier, "");
-
-    kaldi::SequentialBaseFloatMatrixReader feature_reader(
-        FLAGS_feature_rspecifier);
-    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);
-    std::string model_path = FLAGS_model_path;
-    std::string model_params = FLAGS_param_path;
-    std::string dict_file = FLAGS_dict_file;
-    std::string lm_path = FLAGS_lm_path;
-    LOG(INFO) << "model path: " << model_path;
-    LOG(INFO) << "model param: " << model_params;
-    LOG(INFO) << "dict path: " << dict_file;
-    LOG(INFO) << "lm path: " << lm_path;
-
-    int32 num_done = 0, num_err = 0;
-
-    ppspeech::CTCBeamSearchOptions opts;
-    opts.dict_file = dict_file;
-    opts.lm_path = lm_path;
-    ppspeech::CTCBeamSearch decoder(opts);
-
-    ppspeech::ModelOptions model_opts = ppspeech::ModelOptions::InitFromFlags();
-
-    std::shared_ptr<ppspeech::PaddleNnet> nnet(
-        new ppspeech::PaddleNnet(model_opts));
-    std::shared_ptr<ppspeech::DataCache> raw_data(new ppspeech::DataCache());
-    std::shared_ptr<ppspeech::Decodable> decodable(
-        new ppspeech::Decodable(nnet, raw_data));
-
-    int32 chunk_size = FLAGS_receptive_field_length +
-                       (FLAGS_nnet_decoder_chunk - 1) * FLAGS_subsampling_rate;
-    int32 chunk_stride = FLAGS_subsampling_rate * FLAGS_nnet_decoder_chunk;
-    int32 receptive_field_length = FLAGS_receptive_field_length;
-    LOG(INFO) << "chunk size (frame): " << chunk_size;
-    LOG(INFO) << "chunk stride (frame): " << chunk_stride;
-    LOG(INFO) << "receptive field (frame): " << receptive_field_length;
-    decoder.InitDecoder();
-
-    kaldi::Timer timer;
-    for (; !feature_reader.Done(); feature_reader.Next()) {
-        string utt = feature_reader.Key();
-        kaldi::Matrix<BaseFloat> feature = feature_reader.Value();
-        raw_data->SetDim(feature.NumCols());
-        LOG(INFO) << "process utt: " << utt;
-        LOG(INFO) << "rows: " << feature.NumRows();
-        LOG(INFO) << "cols: " << feature.NumCols();
-
-        int32 row_idx = 0;
-        int32 padding_len = 0;
-        int32 ori_feature_len = feature.NumRows();
-        if ((feature.NumRows() - chunk_size) % chunk_stride != 0) {
-            padding_len =
-                chunk_stride - (feature.NumRows() - chunk_size) % chunk_stride;
-            feature.Resize(feature.NumRows() + padding_len,
-                           feature.NumCols(),
-                           kaldi::kCopyData);
-        }
-        int32 num_chunks = (feature.NumRows() - chunk_size) / chunk_stride + 1;
-        for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
-            kaldi::Vector<kaldi::BaseFloat> feature_chunk(chunk_size *
-                                                          feature.NumCols());
-            int32 feature_chunk_size = 0;
-            if (ori_feature_len > chunk_idx * chunk_stride) {
-                feature_chunk_size = std::min(
-                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
-            }
-            if (feature_chunk_size < receptive_field_length) break;
-
-            int32 start = chunk_idx * chunk_stride;
-
-            for (int row_id = 0; row_id < chunk_size; ++row_id) {
-                kaldi::SubVector<kaldi::BaseFloat> tmp(feature, start);
-                kaldi::SubVector<kaldi::BaseFloat> f_chunk_tmp(
-                    feature_chunk.Data() + row_id * feature.NumCols(),
-                    feature.NumCols());
-                f_chunk_tmp.CopyFromVec(tmp);
-                ++start;
-            }
-            raw_data->Accept(feature_chunk);
-            if (chunk_idx == num_chunks - 1) {
-                raw_data->SetFinished();
-            }
-            decoder.AdvanceDecode(decodable);
-        }
-        std::string result;
-        result = decoder.GetFinalBestPath();
-        decodable->Reset();
-        decoder.Reset();
-        if (result.empty()) {
-            // the TokenWriter can not write empty string.
-            ++num_err;
-            KALDI_LOG << " the result of " << utt << " is empty";
-            continue;
-        }
-        KALDI_LOG << " the result of " << utt << " is " << result;
-        result_writer.Write(utt, result);
-        ++num_done;
-    }
-
-    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
-              << " with errors.";
-    double elapsed = timer.Elapsed();
-    KALDI_LOG << " cost:" << elapsed << " s";
-    return (num_done != 0 ? 0 : 1);
-}
--- a/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
+++ b/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
@ -1,9 +0,0 @@
-ThreadPool/
-build/
-dist/
-kenlm/
-openfst-1.6.3/
-openfst-1.6.3.tar.gz
-swig_decoders.egg-info/
-decoders_wrap.cxx
-swig_decoders.py
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
@ -1,607 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "ctc_beam_search_decoder.h"
-
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-#include <limits>
-#include <map>
-#include <utility>
-
-#include "ThreadPool.h"
-#include "fst/fstlib.h"
-
-#include "decoder_utils.h"
-#include "path_trie.h"
-
-using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
-
-
-std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
-    const std::vector<std::vector<double>> &probs_seq,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    double cutoff_prob,
-    size_t cutoff_top_n,
-    Scorer *ext_scorer,
-    size_t blank_id) {
-    // dimension check
-    size_t num_time_steps = probs_seq.size();
-    for (size_t i = 0; i < num_time_steps; ++i) {
-        VALID_CHECK_EQ(probs_seq[i].size(),
-                       // vocabulary.size() + 1,
-                       vocabulary.size(),
-                       "The shape of probs_seq does not match with "
-                       "the shape of the vocabulary");
-    }
-
-
-    // assign space id
-    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
-    int space_id = it - vocabulary.begin();
-    // if no space in vocabulary
-    if ((size_t)space_id >= vocabulary.size()) {
-        space_id = -2;
-    }
-    // init prefixes' root
-    PathTrie root;
-    root.score = root.log_prob_b_prev = 0.0;
-    std::vector<PathTrie *> prefixes;
-    prefixes.push_back(&root);
-
-    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
-        auto fst_dict =
-            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
-        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
-        root.set_dictionary(dict_ptr);
-        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
-        root.set_matcher(matcher);
-    }
-
-    // prefix search over time
-    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
-        auto &prob = probs_seq[time_step];
-
-        float min_cutoff = -NUM_FLT_INF;
-        bool full_beam = false;
-        if (ext_scorer != nullptr) {
-            size_t num_prefixes = std::min(prefixes.size(), beam_size);
-            std::sort(prefixes.begin(),
-                      prefixes.begin() + num_prefixes,
-                      prefix_compare);
-            min_cutoff = prefixes[num_prefixes - 1]->score +
-                         std::log(prob[blank_id]) -
-                         std::max(0.0, ext_scorer->beta);
-            full_beam = (num_prefixes == beam_size);
-        }
-
-        std::vector<std::pair<size_t, float>> log_prob_idx =
-            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
-        // loop over chars
-        for (size_t index = 0; index < log_prob_idx.size(); index++) {
-            auto c = log_prob_idx[index].first;
-            auto log_prob_c = log_prob_idx[index].second;
-
-            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
-                auto prefix = prefixes[i];
-                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
-                    break;
-                }
-                // blank
-                if (c == blank_id) {
-                    prefix->log_prob_b_cur = log_sum_exp(
-                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
-                    continue;
-                }
-                // repeated character
-                if (c == prefix->character) {
-                    prefix->log_prob_nb_cur =
-                        log_sum_exp(prefix->log_prob_nb_cur,
-                                    log_prob_c + prefix->log_prob_nb_prev);
-                }
-                // get new prefix
-                auto prefix_new = prefix->get_path_trie(c);
-
-                if (prefix_new != nullptr) {
-                    float log_p = -NUM_FLT_INF;
-
-                    if (c == prefix->character &&
-                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
-                        log_p = log_prob_c + prefix->log_prob_b_prev;
-                    } else if (c != prefix->character) {
-                        log_p = log_prob_c + prefix->score;
-                    }
-
-                    // language model scoring
-                    if (ext_scorer != nullptr &&
-                        (c == space_id || ext_scorer->is_character_based())) {
-                        PathTrie *prefix_to_score = nullptr;
-                        // skip scoring the space
-                        if (ext_scorer->is_character_based()) {
-                            prefix_to_score = prefix_new;
-                        } else {
-                            prefix_to_score = prefix;
-                        }
-
-                        float score = 0.0;
-                        std::vector<std::string> ngram;
-                        ngram = ext_scorer->make_ngram(prefix_to_score);
-                        score = ext_scorer->get_log_cond_prob(ngram) *
-                                ext_scorer->alpha;
-                        log_p += score;
-                        log_p += ext_scorer->beta;
-                    }
-                    prefix_new->log_prob_nb_cur =
-                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
-                }
-            }  // end of loop over prefix
-        }      // end of loop over vocabulary
-
-
-        prefixes.clear();
-        // update log probs
-        root.iterate_to_vec(prefixes);
-
-        // only preserve top beam_size prefixes
-        if (prefixes.size() >= beam_size) {
-            std::nth_element(prefixes.begin(),
-                             prefixes.begin() + beam_size,
-                             prefixes.end(),
-                             prefix_compare);
-            for (size_t i = beam_size; i < prefixes.size(); ++i) {
-                prefixes[i]->remove();
-            }
-        }
-    }  // end of loop over time
-
-    // score the last word of each prefix that doesn't end with space
-    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
-        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
-            auto prefix = prefixes[i];
-            if (!prefix->is_empty() && prefix->character != space_id) {
-                float score = 0.0;
-                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
-                score =
-                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
-                score += ext_scorer->beta;
-                prefix->score += score;
-            }
-        }
-    }
-
-    size_t num_prefixes = std::min(prefixes.size(), beam_size);
-    std::sort(
-        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
-
-    // compute approximate ctc score as the return score, without affecting the
-    // return order of decoding result. To delete when decoder gets stable.
-    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
-        double approx_ctc = prefixes[i]->score;
-        if (ext_scorer != nullptr) {
-            std::vector<int> output;
-            prefixes[i]->get_path_vec(output);
-            auto prefix_length = output.size();
-            auto words = ext_scorer->split_labels(output);
-            // remove word insert
-            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
-            // remove language model weight:
-            approx_ctc -=
-                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
-        }
-        prefixes[i]->approx_ctc = approx_ctc;
-    }
-
-    return get_beam_search_result(prefixes, vocabulary, beam_size);
-}
-
-
-std::vector<std::vector<std::pair<double, std::string>>>
-ctc_beam_search_decoding_batch(
-    const std::vector<std::vector<std::vector<double>>> &probs_split,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    size_t num_processes,
-    double cutoff_prob,
-    size_t cutoff_top_n,
-    Scorer *ext_scorer,
-    size_t blank_id) {
-    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
-    // thread pool
-    ThreadPool pool(num_processes);
-    // number of samples
-    size_t batch_size = probs_split.size();
-
-    // enqueue the tasks of decoding
-    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
-    for (size_t i = 0; i < batch_size; ++i) {
-        res.emplace_back(pool.enqueue(ctc_beam_search_decoding,
-                                      probs_split[i],
-                                      vocabulary,
-                                      beam_size,
-                                      cutoff_prob,
-                                      cutoff_top_n,
-                                      ext_scorer,
-                                      blank_id));
-    }
-
-    // get decoding results
-    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
-    for (size_t i = 0; i < batch_size; ++i) {
-        batch_results.emplace_back(res[i].get());
-    }
-    return batch_results;
-}
-
-void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer) {
-    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
-        auto fst_dict =
-            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
-        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
-        root->set_dictionary(dict_ptr);
-        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
-        root->set_matcher(matcher);
-    }
-}
-
-void ctc_beam_search_decode_chunk(
-    PathTrie *root,
-    std::vector<PathTrie *> &prefixes,
-    const std::vector<std::vector<double>> &probs_seq,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    double cutoff_prob,
-    size_t cutoff_top_n,
-    Scorer *ext_scorer,
-    size_t blank_id) {
-    // dimension check
-    size_t num_time_steps = probs_seq.size();
-    for (size_t i = 0; i < num_time_steps; ++i) {
-        VALID_CHECK_EQ(probs_seq[i].size(),
-                       // vocabulary.size() + 1,
-                       vocabulary.size(),
-                       "The shape of probs_seq does not match with "
-                       "the shape of the vocabulary");
-    }
-
-    // assign space id
-    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
-    int space_id = it - vocabulary.begin();
-    // if no space in vocabulary
-    if ((size_t)space_id >= vocabulary.size()) {
-        space_id = -2;
-    }
-    // init prefixes' root
-    //
-    // prefix search over time
-    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
-        auto &prob = probs_seq[time_step];
-
-        float min_cutoff = -NUM_FLT_INF;
-        bool full_beam = false;
-        if (ext_scorer != nullptr) {
-            size_t num_prefixes = std::min(prefixes.size(), beam_size);
-            std::sort(prefixes.begin(),
-                      prefixes.begin() + num_prefixes,
-                      prefix_compare);
-            min_cutoff = prefixes[num_prefixes - 1]->score +
-                         std::log(prob[blank_id]) -
-                         std::max(0.0, ext_scorer->beta);
-            full_beam = (num_prefixes == beam_size);
-        }
-
-        std::vector<std::pair<size_t, float>> log_prob_idx =
-            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
-        // loop over chars
-        for (size_t index = 0; index < log_prob_idx.size(); index++) {
-            auto c = log_prob_idx[index].first;
-            auto log_prob_c = log_prob_idx[index].second;
-
-            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
-                auto prefix = prefixes[i];
-                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
-                    break;
-                }
-                // blank
-                if (c == blank_id) {
-                    prefix->log_prob_b_cur = log_sum_exp(
-                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
-                    continue;
-                }
-                // repeated character
-                if (c == prefix->character) {
-                    prefix->log_prob_nb_cur =
-                        log_sum_exp(prefix->log_prob_nb_cur,
-                                    log_prob_c + prefix->log_prob_nb_prev);
-                }
-                // get new prefix
-                auto prefix_new = prefix->get_path_trie(c);
-
-                if (prefix_new != nullptr) {
-                    float log_p = -NUM_FLT_INF;
-
-                    if (c == prefix->character &&
-                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
-                        log_p = log_prob_c + prefix->log_prob_b_prev;
-                    } else if (c != prefix->character) {
-                        log_p = log_prob_c + prefix->score;
-                    }
-
-                    // language model scoring
-                    if (ext_scorer != nullptr &&
-                        (c == space_id || ext_scorer->is_character_based())) {
-                        PathTrie *prefix_to_score = nullptr;
-                        // skip scoring the space
-                        if (ext_scorer->is_character_based()) {
-                            prefix_to_score = prefix_new;
-                        } else {
-                            prefix_to_score = prefix;
-                        }
-
-                        float score = 0.0;
-                        std::vector<std::string> ngram;
-                        ngram = ext_scorer->make_ngram(prefix_to_score);
-                        score = ext_scorer->get_log_cond_prob(ngram) *
-                                ext_scorer->alpha;
-                        log_p += score;
-                        log_p += ext_scorer->beta;
-                    }
-                    prefix_new->log_prob_nb_cur =
-                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
-                }
-            }  // end of loop over prefix
-        }      // end of loop over vocabulary
-
-        prefixes.clear();
-        // update log probs
-
-        root->iterate_to_vec(prefixes);
-
-        // only preserve top beam_size prefixes
-        if (prefixes.size() >= beam_size) {
-            std::nth_element(prefixes.begin(),
-                             prefixes.begin() + beam_size,
-                             prefixes.end(),
-                             prefix_compare);
-            for (size_t i = beam_size; i < prefixes.size(); ++i) {
-                prefixes[i]->remove();
-            }
-        }
-    }  // end of loop over time
-
-    return;
-}
-
-
-std::vector<std::pair<double, std::string>> get_decode_result(
-    std::vector<PathTrie *> &prefixes,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    Scorer *ext_scorer) {
-    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
-    int space_id = it - vocabulary.begin();
-    // if no space in vocabulary
-    if ((size_t)space_id >= vocabulary.size()) {
-        space_id = -2;
-    }
-    // score the last word of each prefix that doesn't end with space
-    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
-        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
-            auto prefix = prefixes[i];
-            if (!prefix->is_empty() && prefix->character != space_id) {
-                float score = 0.0;
-                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
-                score =
-                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
-                score += ext_scorer->beta;
-                prefix->score += score;
-            }
-        }
-    }
-
-    size_t num_prefixes = std::min(prefixes.size(), beam_size);
-    std::sort(
-        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
-
-    // compute aproximate ctc score as the return score, without affecting the
-    // return order of decoding result. To delete when decoder gets stable.
-    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
-        double approx_ctc = prefixes[i]->score;
-        if (ext_scorer != nullptr) {
-            std::vector<int> output;
-            prefixes[i]->get_path_vec(output);
-            auto prefix_length = output.size();
-            auto words = ext_scorer->split_labels(output);
-            // remove word insert
-            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
-            // remove language model weight:
-            approx_ctc -=
-                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
-        }
-        prefixes[i]->approx_ctc = approx_ctc;
-    }
-
-    std::vector<std::pair<double, std::string>> res =
-        get_beam_search_result(prefixes, vocabulary, beam_size);
-
-    // pay back the last word of each prefix that doesn't end with space (for
-    // decoding by chunk)
-    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
-        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
-            auto prefix = prefixes[i];
-            if (!prefix->is_empty() && prefix->character != space_id) {
-                float score = 0.0;
-                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
-                score =
-                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
-                score += ext_scorer->beta;
-                prefix->score -= score;
-            }
-        }
-    }
-    return res;
-}
-
-
-void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage) {
-    storage = nullptr;
-}
-
-
-CtcBeamSearchDecoderBatch::~CtcBeamSearchDecoderBatch() {}
-
-CtcBeamSearchDecoderBatch::CtcBeamSearchDecoderBatch(
-    const std::vector<std::string> &vocabulary,
-    size_t batch_size,
-    size_t beam_size,
-    size_t num_processes,
-    double cutoff_prob,
-    size_t cutoff_top_n,
-    Scorer *ext_scorer,
-    size_t blank_id)
-    : batch_size(batch_size),
-      beam_size(beam_size),
-      num_processes(num_processes),
-      cutoff_prob(cutoff_prob),
-      cutoff_top_n(cutoff_top_n),
-      ext_scorer(ext_scorer),
-      blank_id(blank_id) {
-    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
-    VALID_CHECK_GT(
-        this->num_processes, 0, "num_processes must be nonnegative!");
-    this->vocabulary = vocabulary;
-    for (size_t i = 0; i < batch_size; i++) {
-        this->decoder_storage_vector.push_back(
-            std::unique_ptr<CtcBeamSearchDecoderStorage>(
-                new CtcBeamSearchDecoderStorage()));
-        ctc_beam_search_decode_chunk_begin(
-            this->decoder_storage_vector[i]->root, ext_scorer);
-    }
-};
-
-/**
- * Input
- * probs_split: shape [B, T, D]
- */
-void CtcBeamSearchDecoderBatch::next(
-    const std::vector<std::vector<std::vector<double>>> &probs_split,
-    const std::vector<std::string> &has_value) {
-    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
-    // thread pool
-    size_t num_has_value = 0;
-    for (int i = 0; i < has_value.size(); i++)
-        if (has_value[i] == "true") num_has_value += 1;
-    ThreadPool pool(std::min(num_processes, num_has_value));
-    // number of samples
-    size_t probs_num = probs_split.size();
-    VALID_CHECK_EQ(this->batch_size,
-                   probs_num,
-                   "The batch size of the current input data should be same "
-                   "with the input data before");
-
-    // enqueue the tasks of decoding
-    std::vector<std::future<void>> res;
-    for (size_t i = 0; i < batch_size; ++i) {
-        if (has_value[i] == "true") {
-            res.emplace_back(pool.enqueue(
-                ctc_beam_search_decode_chunk,
-                std::ref(this->decoder_storage_vector[i]->root),
-                std::ref(this->decoder_storage_vector[i]->prefixes),
-                probs_split[i],
-                this->vocabulary,
-                this->beam_size,
-                this->cutoff_prob,
-                this->cutoff_top_n,
-                this->ext_scorer,
-                this->blank_id));
-        }
-    }
-
-    for (size_t i = 0; i < batch_size; ++i) {
-        res[i].get();
-    }
-    return;
-};
-
-/**
- * Return
- * batch_result: shape[B, beam_size,(-approx_ctc score, string)]
- */
-std::vector<std::vector<std::pair<double, std::string>>>
-CtcBeamSearchDecoderBatch::decode() {
-    VALID_CHECK_GT(
-        this->num_processes, 0, "num_processes must be nonnegative!");
-    // thread pool
-    ThreadPool pool(this->num_processes);
-    // number of samples
-    // enqueue the tasks of decoding
-    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
-    for (size_t i = 0; i < this->batch_size; ++i) {
-        res.emplace_back(
-            pool.enqueue(get_decode_result,
-                         std::ref(this->decoder_storage_vector[i]->prefixes),
-                         this->vocabulary,
-                         this->beam_size,
-                         this->ext_scorer));
-    }
-    // get decoding results
-    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
-    for (size_t i = 0; i < this->batch_size; ++i) {
-        batch_results.emplace_back(res[i].get());
-    }
-    return batch_results;
-}
-
-
-/**
- * reset the state of ctcBeamSearchDecoderBatch
- */
-void CtcBeamSearchDecoderBatch::reset_state(size_t batch_size,
-                                            size_t beam_size,
-                                            size_t num_processes,
-                                            double cutoff_prob,
-                                            size_t cutoff_top_n) {
-    this->batch_size = batch_size;
-    this->beam_size = beam_size;
-    this->num_processes = num_processes;
-    this->cutoff_prob = cutoff_prob;
-    this->cutoff_top_n = cutoff_top_n;
-
-    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
-    VALID_CHECK_GT(
-        this->num_processes, 0, "num_processes must be nonnegative!");
-    // thread pool
-    ThreadPool pool(this->num_processes);
-    // number of samples
-    // enqueue the tasks of decoding
-    std::vector<std::future<void>> res;
-    size_t storage_size = decoder_storage_vector.size();
-    for (size_t i = 0; i < storage_size; i++) {
-        res.emplace_back(pool.enqueue(
-            free_storage, std::ref(this->decoder_storage_vector[i])));
-    }
-    for (size_t i = 0; i < storage_size; ++i) {
-        res[i].get();
-    }
-    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>().swap(
-        decoder_storage_vector);
-    for (size_t i = 0; i < this->batch_size; i++) {
-        this->decoder_storage_vector.push_back(
-            std::unique_ptr<CtcBeamSearchDecoderStorage>(
-                new CtcBeamSearchDecoderStorage()));
-        ctc_beam_search_decode_chunk_begin(
-            this->decoder_storage_vector[i]->root, this->ext_scorer);
-    }
-}
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
@ -1,175 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef CTC_BEAM_SEARCH_DECODER_H_
-#define CTC_BEAM_SEARCH_DECODER_H_
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "scorer.h"
-
-/* CTC Beam Search Decoder
-
- * Parameters:
- *     probs_seq: 2-D vector that each element is a vector of probabilities
- *               over vocabulary of one time step.
- *     vocabulary: A vector of vocabulary.
- *     beam_size: The width of beam search.
- *     cutoff_prob: Cutoff probability for pruning.
- *     cutoff_top_n: Cutoff number for pruning.
- *     ext_scorer: External scorer to evaluate a prefix, which consists of
- *                 n-gram language model scoring and word insertion term.
- *                 Default null, decoding the input sample without scorer.
- * Return:
- *     A vector that each element is a pair of score  and decoding result,
- *     in desending order.
-*/
-std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
-    const std::vector<std::vector<double>> &probs_seq,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    double cutoff_prob = 1.0,
-    size_t cutoff_top_n = 40,
-    Scorer *ext_scorer = nullptr,
-    size_t blank_id = 0);
-
-
-/* CTC Beam Search Decoder for batch data
-
- * Parameters:
- *     probs_seq: 3-D vector that each element is a 2-D vector that can be used
- *                by ctc_beam_search_decoder().
- *     vocabulary: A vector of vocabulary.
- *     beam_size: The width of beam search.
- *     num_processes: Number of threads for beam search.
- *     cutoff_prob: Cutoff probability for pruning.
- *     cutoff_top_n: Cutoff number for pruning.
- *     ext_scorer: External scorer to evaluate a prefix, which consists of
- *                 n-gram language model scoring and word insertion term.
- *                 Default null, decoding the input sample without scorer.
- * Return:
- *     A 2-D vector that each element is a vector of beam search decoding
- *     result for one audio sample.
-*/
-std::vector<std::vector<std::pair<double, std::string>>>
-ctc_beam_search_decoding_batch(
-    const std::vector<std::vector<std::vector<double>>> &probs_split,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    size_t num_processes,
-    double cutoff_prob = 1.0,
-    size_t cutoff_top_n = 40,
-    Scorer *ext_scorer = nullptr,
-    size_t blank_id = 0);
-
-/**
- * Store the root and prefixes for decoder
- */
-
-class CtcBeamSearchDecoderStorage {
-  public:
-    PathTrie *root = nullptr;
-    std::vector<PathTrie *> prefixes;
-
-    CtcBeamSearchDecoderStorage() {
-        // init prefixes' root
-        this->root = new PathTrie();
-        this->root->log_prob_b_prev = 0.0;
-        // The score of root is in log scale.Since the prob=1.0, the prob score
-        // in log scale is 0.0
-        this->root->score = root->log_prob_b_prev;
-        // std::vector<PathTrie *> prefixes;
-        this->prefixes.push_back(root);
-    };
-
-    ~CtcBeamSearchDecoderStorage() {
-        if (root != nullptr) {
-            delete root;
-            root = nullptr;
-        }
-    };
-};
-
-/**
- * The ctc beam search decoder, support batchsize >= 1
- */
-class CtcBeamSearchDecoderBatch {
-  public:
-    CtcBeamSearchDecoderBatch(const std::vector<std::string> &vocabulary,
-                              size_t batch_size,
-                              size_t beam_size,
-                              size_t num_processes,
-                              double cutoff_prob,
-                              size_t cutoff_top_n,
-                              Scorer *ext_scorer,
-                              size_t blank_id);
-
-    ~CtcBeamSearchDecoderBatch();
-    void next(const std::vector<std::vector<std::vector<double>>> &probs_split,
-              const std::vector<std::string> &has_value);
-
-    std::vector<std::vector<std::pair<double, std::string>>> decode();
-
-    void reset_state(size_t batch_size,
-                     size_t beam_size,
-                     size_t num_processes,
-                     double cutoff_prob,
-                     size_t cutoff_top_n);
-
-  private:
-    std::vector<std::string> vocabulary;
-    size_t batch_size;
-    size_t beam_size;
-    size_t num_processes;
-    double cutoff_prob;
-    size_t cutoff_top_n;
-    Scorer *ext_scorer;
-    size_t blank_id;
-    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>
-        decoder_storage_vector;
-};
-
-/**
- * function for chunk decoding
- */
-void ctc_beam_search_decode_chunk(
-    PathTrie *root,
-    std::vector<PathTrie *> &prefixes,
-    const std::vector<std::vector<double>> &probs_seq,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    double cutoff_prob,
-    size_t cutoff_top_n,
-    Scorer *ext_scorer,
-    size_t blank_id);
-
-std::vector<std::pair<double, std::string>> get_decode_result(
-    std::vector<PathTrie *> &prefixes,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size,
-    Scorer *ext_scorer);
-
-/**
- * free the CtcBeamSearchDecoderStorage
- */
-void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage);
-
-/**
- * initialize the root
- */
-void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer);
-
-#endif  // CTC_BEAM_SEARCH_DECODER_H_
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
@ -1,61 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "ctc_greedy_decoder.h"
-#include "decoder_utils.h"
-
-std::string ctc_greedy_decoding(
-    const std::vector<std::vector<double>> &probs_seq,
-    const std::vector<std::string> &vocabulary,
-    size_t blank_id) {
-    // dimension check
-    size_t num_time_steps = probs_seq.size();
-    for (size_t i = 0; i < num_time_steps; ++i) {
-        VALID_CHECK_EQ(probs_seq[i].size(),
-                       vocabulary.size(),
-                       "The shape of probs_seq does not match with "
-                       "the shape of the vocabulary");
-    }
-
-    // size_t blank_id = vocabulary.size();
-
-    std::vector<size_t> max_idx_vec(num_time_steps, 0);
-    std::vector<size_t> idx_vec;
-    for (size_t i = 0; i < num_time_steps; ++i) {
-        double max_prob = 0.0;
-        size_t max_idx = 0;
-        const std::vector<double> &probs_step = probs_seq[i];
-        for (size_t j = 0; j < probs_step.size(); ++j) {
-            if (max_prob < probs_step[j]) {
-                max_idx = j;
-                max_prob = probs_step[j];
-            }
-        }
-        // id with maximum probability in current time step
-        max_idx_vec[i] = max_idx;
-        // deduplicate
-        if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) {
-            idx_vec.push_back(max_idx_vec[i]);
-        }
-    }
-
-    std::string best_path_result;
-    for (size_t i = 0; i < idx_vec.size(); ++i) {
-        if (idx_vec[i] != blank_id) {
-            std::string ch = vocabulary[idx_vec[i]];
-            best_path_result += (ch == kSPACE) ? tSPACE : ch;
-        }
-    }
-    return best_path_result;
-}
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
@ -1,35 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef CTC_GREEDY_DECODER_H
-#define CTC_GREEDY_DECODER_H
-
-#include <string>
-#include <vector>
-
-/* CTC Greedy (Best Path) Decoder
- *
- * Parameters:
- *     probs_seq: 2-D vector that each element is a vector of probabilities
- *               over vocabulary of one time step.
- *     vocabulary: A vector of vocabulary.
- * Return:
- *     The decoding result in string
- */
-std::string ctc_greedy_decoding(
-    const std::vector<std::vector<double>>& probs_seq,
-    const std::vector<std::string>& vocabulary,
-    size_t blank_id);
-
-#endif  // CTC_GREEDY_DECODER_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
@ -1,193 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder_utils.h"
-
-#include <algorithm>
-#include <cmath>
-#include <limits>
-
-std::vector<std::pair<size_t, float>> get_pruned_log_probs(
-    const std::vector<double> &prob_step,
-    double cutoff_prob,
-    size_t cutoff_top_n) {
-    std::vector<std::pair<int, double>> prob_idx;
-    for (size_t i = 0; i < prob_step.size(); ++i) {
-        prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
-    }
-    // pruning of vocabulary
-    size_t cutoff_len = prob_step.size();
-    if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
-        std::sort(prob_idx.begin(),
-                  prob_idx.end(),
-                  pair_comp_second_rev<int, double>);
-        if (cutoff_prob < 1.0) {
-            double cum_prob = 0.0;
-            cutoff_len = 0;
-            for (size_t i = 0; i < prob_idx.size(); ++i) {
-                cum_prob += prob_idx[i].second;
-                cutoff_len += 1;
-                if (cum_prob >= cutoff_prob || cutoff_len >= cutoff_top_n)
-                    break;
-            }
-        }
-        prob_idx = std::vector<std::pair<int, double>>(
-            prob_idx.begin(), prob_idx.begin() + cutoff_len);
-    }
-    std::vector<std::pair<size_t, float>> log_prob_idx;
-    for (size_t i = 0; i < cutoff_len; ++i) {
-        log_prob_idx.push_back(std::pair<int, float>(
-            prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN)));
-    }
-    return log_prob_idx;
-}
-
-
-std::vector<std::pair<double, std::string>> get_beam_search_result(
-    const std::vector<PathTrie *> &prefixes,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size) {
-    // allow for the post processing
-    std::vector<PathTrie *> space_prefixes;
-    if (space_prefixes.empty()) {
-        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
-            space_prefixes.push_back(prefixes[i]);
-        }
-    }
-
-    std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare);
-    std::vector<std::pair<double, std::string>> output_vecs;
-    for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) {
-        std::vector<int> output;
-        space_prefixes[i]->get_path_vec(output);
-        // convert index to string
-        std::string output_str;
-        for (size_t j = 0; j < output.size(); j++) {
-            std::string ch = vocabulary[output[j]];
-            output_str += (ch == kSPACE) ? tSPACE : ch;
-        }
-        std::pair<double, std::string> output_pair(
-            -space_prefixes[i]->approx_ctc, output_str);
-        output_vecs.emplace_back(output_pair);
-    }
-
-    return output_vecs;
-}
-
-size_t get_utf8_str_len(const std::string &str) {
-    size_t str_len = 0;
-    for (char c : str) {
-        str_len += ((c & 0xc0) != 0x80);
-    }
-    return str_len;
-}
-
-std::vector<std::string> split_utf8_str(const std::string &str) {
-    std::vector<std::string> result;
-    std::string out_str;
-
-    for (char c : str) {
-        if ((c & 0xc0) != 0x80)  // new UTF-8 character
-        {
-            if (!out_str.empty()) {
-                result.push_back(out_str);
-                out_str.clear();
-            }
-        }
-
-        out_str.append(1, c);
-    }
-    result.push_back(out_str);
-    return result;
-}
-
-std::vector<std::string> split_str(const std::string &s,
-                                   const std::string &delim) {
-    std::vector<std::string> result;
-    std::size_t start = 0, delim_len = delim.size();
-    while (true) {
-        std::size_t end = s.find(delim, start);
-        if (end == std::string::npos) {
-            if (start < s.size()) {
-                result.push_back(s.substr(start));
-            }
-            break;
-        }
-        if (end > start) {
-            result.push_back(s.substr(start, end - start));
-        }
-        start = end + delim_len;
-    }
-    return result;
-}
-
-bool prefix_compare(const PathTrie *x, const PathTrie *y) {
-    if (x->score == y->score) {
-        if (x->character == y->character) {
-            return false;
-        } else {
-            return (x->character < y->character);
-        }
-    } else {
-        return x->score > y->score;
-    }
-}
-
-void add_word_to_fst(const std::vector<int> &word,
-                     fst::StdVectorFst *dictionary) {
-    if (dictionary->NumStates() == 0) {
-        fst::StdVectorFst::StateId start = dictionary->AddState();
-        assert(start == 0);
-        dictionary->SetStart(start);
-    }
-    fst::StdVectorFst::StateId src = dictionary->Start();
-    fst::StdVectorFst::StateId dst;
-    for (auto c : word) {
-        dst = dictionary->AddState();
-        dictionary->AddArc(src, fst::StdArc(c, c, 0, dst));
-        src = dst;
-    }
-    dictionary->SetFinal(dst, fst::StdArc::Weight::One());
-}
-
-bool add_word_to_dictionary(
-    const std::string &word,
-    const std::unordered_map<std::string, int> &char_map,
-    bool add_space,
-    int SPACE_ID,
-    fst::StdVectorFst *dictionary) {
-    auto characters = split_utf8_str(word);
-
-    std::vector<int> int_word;
-
-    for (auto &c : characters) {
-        if (c == " ") {
-            int_word.push_back(SPACE_ID);
-        } else {
-            auto int_c = char_map.find(c);
-            if (int_c != char_map.end()) {
-                int_word.push_back(int_c->second);
-            } else {
-                return false;  // return without adding
-            }
-        }
-    }
-
-    if (add_space) {
-        int_word.push_back(SPACE_ID);
-    }
-
-    add_word_to_fst(int_word, dictionary);
-    return true;  // return with successful adding
-}
--- a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
@ -1,111 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DECODER_UTILS_H_
-#define DECODER_UTILS_H_
-
-#include <string>
-#include <utility>
-#include "fst/log.h"
-#include "path_trie.h"
-
-const std::string kSPACE = "<space>";
-const std::string tSPACE = " ";
-const float NUM_FLT_INF = std::numeric_limits<float>::max();
-const float NUM_FLT_MIN = std::numeric_limits<float>::min();
-
-// inline function for validation check
-inline void check(
-    bool x, const char *expr, const char *file, int line, const char *err) {
-    if (!x) {
-        std::cout << "[" << file << ":" << line << "] ";
-        LOG(FATAL) << "\"" << expr << "\" check failed. " << err;
-    }
-}
-
-#define VALID_CHECK(x, info) \
-    check(static_cast<bool>(x), #x, __FILE__, __LINE__, info)
-#define VALID_CHECK_EQ(x, y, info) VALID_CHECK((x) == (y), info)
-#define VALID_CHECK_GT(x, y, info) VALID_CHECK((x) > (y), info)
-#define VALID_CHECK_LT(x, y, info) VALID_CHECK((x) < (y), info)
-
-
-// Function template for comparing two pairs
-template <typename T1, typename T2>
-bool pair_comp_first_rev(const std::pair<T1, T2> &a,
-                         const std::pair<T1, T2> &b) {
-    return a.first > b.first;
-}
-
-// Function template for comparing two pairs
-template <typename T1, typename T2>
-bool pair_comp_second_rev(const std::pair<T1, T2> &a,
-                          const std::pair<T1, T2> &b) {
-    return a.second > b.second;
-}
-
-// Return the sum of two probabilities in log scale
-template <typename T>
-T log_sum_exp(const T &x, const T &y) {
-    static T num_min = -std::numeric_limits<T>::max();
-    if (x <= num_min) return y;
-    if (y <= num_min) return x;
-    T xmax = std::max(x, y);
-    return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
-}
-
-// Get pruned probability vector for each time step's beam search
-std::vector<std::pair<size_t, float>> get_pruned_log_probs(
-    const std::vector<double> &prob_step,
-    double cutoff_prob,
-    size_t cutoff_top_n);
-
-// Get beam search result from prefixes in trie tree
-std::vector<std::pair<double, std::string>> get_beam_search_result(
-    const std::vector<PathTrie *> &prefixes,
-    const std::vector<std::string> &vocabulary,
-    size_t beam_size);
-
-// Functor for prefix comparsion
-bool prefix_compare(const PathTrie *x, const PathTrie *y);
-
-/* Get length of utf8 encoding string
- * See: http://stackoverflow.com/a/4063229
- */
-size_t get_utf8_str_len(const std::string &str);
-
-/* Split a string into a list of strings on a given string
- * delimiter. NB: delimiters on beginning / end of string are
- * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"].
- */
-std::vector<std::string> split_str(const std::string &s,
-                                   const std::string &delim);
-
-/* Splits string into vector of strings representing
- * UTF-8 characters (not same as chars)
- */
-std::vector<std::string> split_utf8_str(const std::string &str);
-
-// Add a word in index to the dicionary of fst
-void add_word_to_fst(const std::vector<int> &word,
-                     fst::StdVectorFst *dictionary);
-
-// Add a word in string to dictionary
-bool add_word_to_dictionary(
-    const std::string &word,
-    const std::unordered_map<std::string, int> &char_map,
-    bool add_space,
-    int SPACE_ID,
-    fst::StdVectorFst *dictionary);
-#endif  // DECODER_UTILS_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
@ -1,164 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "path_trie.h"
-
-#include <algorithm>
-#include <limits>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "decoder_utils.h"
-
-PathTrie::PathTrie() {
-    log_prob_b_prev = -NUM_FLT_INF;
-    log_prob_nb_prev = -NUM_FLT_INF;
-    log_prob_b_cur = -NUM_FLT_INF;
-    log_prob_nb_cur = -NUM_FLT_INF;
-    score = -NUM_FLT_INF;
-
-    ROOT_ = -1;
-    character = ROOT_;
-    exists_ = true;
-    parent = nullptr;
-
-    dictionary_ = nullptr;
-    dictionary_state_ = 0;
-    has_dictionary_ = false;
-
-    matcher_ = nullptr;
-}
-
-PathTrie::~PathTrie() {
-    for (auto child : children_) {
-        delete child.second;
-        child.second = nullptr;
-    }
-}
-
-PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
-    auto child = children_.begin();
-    for (child = children_.begin(); child != children_.end(); ++child) {
-        if (child->first == new_char) {
-            break;
-        }
-    }
-    if (child != children_.end()) {
-        if (!child->second->exists_) {
-            child->second->exists_ = true;
-            child->second->log_prob_b_prev = -NUM_FLT_INF;
-            child->second->log_prob_nb_prev = -NUM_FLT_INF;
-            child->second->log_prob_b_cur = -NUM_FLT_INF;
-            child->second->log_prob_nb_cur = -NUM_FLT_INF;
-        }
-        return (child->second);
-    } else {
-        if (has_dictionary_) {
-            matcher_->SetState(dictionary_state_);
-            bool found = matcher_->Find(new_char + 1);
-            if (!found) {
-                // Adding this character causes word outside dictionary
-                auto FSTZERO = fst::TropicalWeight::Zero();
-                auto final_weight = dictionary_->Final(dictionary_state_);
-                bool is_final = (final_weight != FSTZERO);
-                if (is_final && reset) {
-                    dictionary_state_ = dictionary_->Start();
-                }
-                return nullptr;
-            } else {
-                PathTrie* new_path = new PathTrie;
-                new_path->character = new_char;
-                new_path->parent = this;
-                new_path->dictionary_ = dictionary_;
-                new_path->dictionary_state_ = matcher_->Value().nextstate;
-                new_path->has_dictionary_ = true;
-                new_path->matcher_ = matcher_;
-                children_.push_back(std::make_pair(new_char, new_path));
-                return new_path;
-            }
-        } else {
-            PathTrie* new_path = new PathTrie;
-            new_path->character = new_char;
-            new_path->parent = this;
-            children_.push_back(std::make_pair(new_char, new_path));
-            return new_path;
-        }
-    }
-}
-
-PathTrie* PathTrie::get_path_vec(std::vector<int>& output) {
-    return get_path_vec(output, ROOT_);
-}
-
-PathTrie* PathTrie::get_path_vec(std::vector<int>& output,
-                                 int stop,
-                                 size_t max_steps) {
-    if (character == stop || character == ROOT_ || output.size() == max_steps) {
-        std::reverse(output.begin(), output.end());
-        return this;
-    } else {
-        output.push_back(character);
-        return parent->get_path_vec(output, stop, max_steps);
-    }
-}
-
-void PathTrie::iterate_to_vec(std::vector<PathTrie*>& output) {
-    if (exists_) {
-        log_prob_b_prev = log_prob_b_cur;
-        log_prob_nb_prev = log_prob_nb_cur;
-
-        log_prob_b_cur = -NUM_FLT_INF;
-        log_prob_nb_cur = -NUM_FLT_INF;
-
-        score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev);
-        output.push_back(this);
-    }
-    for (auto child : children_) {
-        child.second->iterate_to_vec(output);
-    }
-}
-
-void PathTrie::remove() {
-    exists_ = false;
-    if (children_.size() == 0) {
-        if (parent != nullptr) {
-            auto child = parent->children_.begin();
-            for (child = parent->children_.begin();
-                 child != parent->children_.end();
-                 ++child) {
-                if (child->first == character) {
-                    parent->children_.erase(child);
-                    break;
-                }
-            }
-            if (parent->children_.size() == 0 && !parent->exists_) {
-                parent->remove();
-            }
-        }
-        delete this;
-    }
-}
-
-
-void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) {
-    dictionary_ = dictionary;
-    dictionary_state_ = dictionary->Start();
-    has_dictionary_ = true;
-}
-
-using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
-void PathTrie::set_matcher(std::shared_ptr<FSTMATCH> matcher) {
-    matcher_ = matcher;
-}
--- a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
@ -1,82 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef PATH_TRIE_H
-#define PATH_TRIE_H
-
-#include <algorithm>
-#include <limits>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "fst/fstlib.h"
-
-/* Trie tree for prefix storing and manipulating, with a dictionary in
- * finite-state transducer for spelling correction.
- */
-class PathTrie {
-  public:
-    PathTrie();
-    ~PathTrie();
-
-    // get new prefix after appending new char
-    PathTrie* get_path_trie(int new_char, bool reset = true);
-
-    // get the prefix in index from root to current node
-    PathTrie* get_path_vec(std::vector<int>& output);
-
-    // get the prefix in index from some stop node to current nodel
-    PathTrie* get_path_vec(
-        std::vector<int>& output,
-        int stop,
-        size_t max_steps = std::numeric_limits<size_t>::max());
-
-    // update log probs
-    void iterate_to_vec(std::vector<PathTrie*>& output);
-
-    // set dictionary for FST
-    void set_dictionary(fst::StdVectorFst* dictionary);
-
-    void set_matcher(std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>>);
-
-    bool is_empty() { return ROOT_ == character; }
-
-    // remove current path from root
-    void remove();
-
-    float log_prob_b_prev;
-    float log_prob_nb_prev;
-    float log_prob_b_cur;
-    float log_prob_nb_cur;
-    float score;
-    float approx_ctc;
-    int character;
-    PathTrie* parent;
-
-  private:
-    int ROOT_;
-    bool exists_;
-    bool has_dictionary_;
-
-    std::vector<std::pair<int, PathTrie*>> children_;
-
-    // pointer to dictionary of FST
-    fst::StdVectorFst* dictionary_;
-    fst::StdVectorFst::StateId dictionary_state_;
-    // true if finding ars in FST
-    std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>> matcher_;
-};
-
-#endif  // PATH_TRIE_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
@ -1,232 +0,0 @@
-// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
-// "COPYING.LESSER.3");
-
-#include "scorer.h"
-
-#include <unistd.h>
-#include <iostream>
-
-#include "lm/config.hh"
-#include "lm/model.hh"
-#include "lm/state.hh"
-
-#include "decoder_utils.h"
-
-using namespace lm::ngram;
-// if your platform is windows ,you need add the define
-#define    F_OK    0
-Scorer::Scorer(double alpha,
-               double beta,
-               const std::string& lm_path,
-               const std::vector<std::string>& vocab_list) {
-    this->alpha = alpha;
-    this->beta = beta;
-
-    dictionary = nullptr;
-    is_character_based_ = true;
-    language_model_ = nullptr;
-
-    max_order_ = 0;
-    dict_size_ = 0;
-    SPACE_ID_ = -1;
-
-    setup(lm_path, vocab_list);
-}
-
-Scorer::~Scorer() {
-    if (language_model_ != nullptr) {
-        delete static_cast<lm::base::Model*>(language_model_);
-    }
-    if (dictionary != nullptr) {
-        delete static_cast<fst::StdVectorFst*>(dictionary);
-    }
-}
-
-void Scorer::setup(const std::string& lm_path,
-                   const std::vector<std::string>& vocab_list) {
-    // load language model
-    load_lm(lm_path);
-    // set char map for scorer
-    set_char_map(vocab_list);
-    // fill the dictionary for FST
-    if (!is_character_based()) {
-        fill_dictionary(true);
-    }
-}
-
-void Scorer::load_lm(const std::string& lm_path) {
-    const char* filename = lm_path.c_str();
-    VALID_CHECK_EQ(access(filename, F_OK), 0, "Invalid language model path");
-
-    RetriveStrEnumerateVocab enumerate;
-    lm::ngram::Config config;
-    config.enumerate_vocab = &enumerate;
-    language_model_ = lm::ngram::LoadVirtual(filename, config);
-    max_order_ = static_cast<lm::base::Model*>(language_model_)->Order();
-    vocabulary_ = enumerate.vocabulary;
-    for (size_t i = 0; i < vocabulary_.size(); ++i) {
-        if (is_character_based_ && vocabulary_[i] != UNK_TOKEN &&
-            vocabulary_[i] != START_TOKEN && vocabulary_[i] != END_TOKEN &&
-            get_utf8_str_len(enumerate.vocabulary[i]) > 1) {
-            is_character_based_ = false;
-        }
-    }
-}
-
-double Scorer::get_log_cond_prob(const std::vector<std::string>& words) {
-    lm::base::Model* model = static_cast<lm::base::Model*>(language_model_);
-    double cond_prob;
-    lm::ngram::State state, tmp_state, out_state;
-    // avoid to inserting <s> in begin
-    model->NullContextWrite(&state);
-    for (size_t i = 0; i < words.size(); ++i) {
-        lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]);
-        // encounter OOV
-        if (word_index == 0) {
-            return OOV_SCORE;
-        }
-        cond_prob = model->BaseScore(&state, word_index, &out_state);
-        tmp_state = state;
-        state = out_state;
-        out_state = tmp_state;
-    }
-    // return  log10 prob
-    return cond_prob;
-}
-
-double Scorer::get_sent_log_prob(const std::vector<std::string>& words) {
-    std::vector<std::string> sentence;
-    if (words.size() == 0) {
-        for (size_t i = 0; i < max_order_; ++i) {
-            sentence.push_back(START_TOKEN);
-        }
-    } else {
-        for (size_t i = 0; i < max_order_ - 1; ++i) {
-            sentence.push_back(START_TOKEN);
-        }
-        sentence.insert(sentence.end(), words.begin(), words.end());
-    }
-    sentence.push_back(END_TOKEN);
-    return get_log_prob(sentence);
-}
-
-double Scorer::get_log_prob(const std::vector<std::string>& words) {
-    assert(words.size() > max_order_);
-    double score = 0.0;
-    for (size_t i = 0; i < words.size() - max_order_ + 1; ++i) {
-        std::vector<std::string> ngram(words.begin() + i,
-                                       words.begin() + i + max_order_);
-        score += get_log_cond_prob(ngram);
-    }
-    return score;
-}
-
-void Scorer::reset_params(float alpha, float beta) {
-    this->alpha = alpha;
-    this->beta = beta;
-}
-
-std::string Scorer::vec2str(const std::vector<int>& input) {
-    std::string word;
-    for (auto ind : input) {
-        word += char_list_[ind];
-    }
-    return word;
-}
-
-std::vector<std::string> Scorer::split_labels(const std::vector<int>& labels) {
-    if (labels.empty()) return {};
-
-    std::string s = vec2str(labels);
-    std::vector<std::string> words;
-    if (is_character_based_) {
-        words = split_utf8_str(s);
-    } else {
-        words = split_str(s, " ");
-    }
-    return words;
-}
-
-void Scorer::set_char_map(const std::vector<std::string>& char_list) {
-    char_list_ = char_list;
-    char_map_.clear();
-
-    // Set the char map for the FST for spelling correction
-    for (size_t i = 0; i < char_list_.size(); i++) {
-        if (char_list_[i] == kSPACE) {
-            SPACE_ID_ = i;
-        }
-        // The initial state of FST is state 0, hence the index of chars in
-        // the FST should start from 1 to avoid the conflict with the initial
-        // state, otherwise wrong decoding results would be given.
-        char_map_[char_list_[i]] = i + 1;
-    }
-}
-
-std::vector<std::string> Scorer::make_ngram(PathTrie* prefix) {
-    std::vector<std::string> ngram;
-    PathTrie* current_node = prefix;
-    PathTrie* new_node = nullptr;
-
-    for (int order = 0; order < max_order_; order++) {
-        std::vector<int> prefix_vec;
-
-        if (is_character_based_) {
-            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_, 1);
-            current_node = new_node;
-        } else {
-            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_);
-            current_node = new_node->parent;  // Skipping spaces
-        }
-
-        // reconstruct word
-        std::string word = vec2str(prefix_vec);
-        ngram.push_back(word);
-
-        if (new_node->character == -1) {
-            // No more spaces, but still need order
-            for (int i = 0; i < max_order_ - order - 1; i++) {
-                ngram.push_back(START_TOKEN);
-            }
-            break;
-        }
-    }
-    std::reverse(ngram.begin(), ngram.end());
-    return ngram;
-}
-
-void Scorer::fill_dictionary(bool add_space) {
-    fst::StdVectorFst dictionary;
-    // For each unigram convert to ints and put in trie
-    int dict_size = 0;
-    for (const auto& word : vocabulary_) {
-        bool added = add_word_to_dictionary(
-            word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
-        dict_size += added ? 1 : 0;
-    }
-
-    dict_size_ = dict_size;
-
-    /* Simplify FST
-
-     * This gets rid of "epsilon" transitions in the FST.
-     * These are transitions that don't require a string input to be taken.
-     * Getting rid of them is necessary to make the FST deterministic, but
-     * can greatly increase the size of the FST
-     */
-    fst::RmEpsilon(&dictionary);
-    fst::StdVectorFst* new_dict = new fst::StdVectorFst;
-
-    /* This makes the FST deterministic, meaning for any string input there's
-     * only one possible state the FST could be in.  It is assumed our
-     * dictionary is deterministic when using it.
-     * (lest we'd have to check for multiple transitions at each state)
-     */
-    fst::Determinize(dictionary, new_dict);
-
-    /* Finds the simplest equivalent fst. This is unnecessary but decreases
-     * memory usage of the dictionary
-     */
-    fst::Minimize(new_dict);
-    this->dictionary = new_dict;
-}
--- a/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
@ -1,114 +0,0 @@
-// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
-// "COPYING.LESSER.3");
-
-#ifndef SCORER_H_
-#define SCORER_H_
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "lm/enumerate_vocab.hh"
-#include "lm/virtual_interface.hh"
-#include "lm/word_index.hh"
-
-#include "path_trie.h"
-
-const double OOV_SCORE = -1000.0;
-const std::string START_TOKEN = "<s>";
-const std::string UNK_TOKEN = "<unk>";
-const std::string END_TOKEN = "</s>";
-
-// Implement a callback to retrive the dictionary of language model.
-class RetriveStrEnumerateVocab : public lm::EnumerateVocab {
-  public:
-    RetriveStrEnumerateVocab() {}
-
-    void Add(lm::WordIndex index, const StringPiece &str) {
-        vocabulary.push_back(std::string(str.data(), str.length()));
-    }
-
-    std::vector<std::string> vocabulary;
-};
-
-/* External scorer to query score for n-gram or sentence, including language
- * model scoring and word insertion.
- *
- * Example:
- *     Scorer scorer(alpha, beta, "path_of_language_model");
- *     scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" });
- *     scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" });
- */
-class Scorer {
-  public:
-    Scorer(double alpha,
-           double beta,
-           const std::string &lm_path,
-           const std::vector<std::string> &vocabulary);
-    ~Scorer();
-
-    double get_log_cond_prob(const std::vector<std::string> &words);
-
-    double get_sent_log_prob(const std::vector<std::string> &words);
-
-    // return the max order
-    size_t get_max_order() const { return max_order_; }
-
-    // return the dictionary size of language model
-    size_t get_dict_size() const { return dict_size_; }
-
-    // retrun true if the language model is character based
-    bool is_character_based() const { return is_character_based_; }
-
-    // reset params alpha & beta
-    void reset_params(float alpha, float beta);
-
-    // make ngram for a given prefix
-    std::vector<std::string> make_ngram(PathTrie *prefix);
-
-    // trransform the labels in index to the vector of words (word based lm) or
-    // the vector of characters (character based lm)
-    std::vector<std::string> split_labels(const std::vector<int> &labels);
-
-    // language model weight
-    double alpha;
-    // word insertion weight
-    double beta;
-
-    // pointer to the dictionary of FST
-    void *dictionary;
-
-  protected:
-    // necessary setup: load language model, set char map, fill FST's dictionary
-    void setup(const std::string &lm_path,
-               const std::vector<std::string> &vocab_list);
-
-    // load language model from given path
-    void load_lm(const std::string &lm_path);
-
-    // fill dictionary for FST
-    void fill_dictionary(bool add_space);
-
-    // set char map
-    void set_char_map(const std::vector<std::string> &char_list);
-
-    double get_log_prob(const std::vector<std::string> &words);
-
-    // translate the vector in index to string
-    std::string vec2str(const std::vector<int> &input);
-
-  private:
-    void *language_model_;
-    bool is_character_based_;
-    size_t max_order_;
-    size_t dict_size_;
-
-    int SPACE_ID_;
-    std::vector<std::string> char_list_;
-    std::unordered_map<std::string, int> char_map_;
-
-    std::vector<std::string> vocabulary_;
-};
-
-#endif  // SCORER_H_
--- a/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
@ -1,77 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// todo refactor, repalce with gtest
-
-#include "base/flags.h"
-#include "base/log.h"
-#include "decoder/ctc_beam_search_decoder.h"
-#include "kaldi/util/table-types.h"
-#include "nnet/decodable.h"
-
-DEFINE_string(nnet_prob_respecifier, "", "test nnet prob rspecifier");
-DEFINE_string(dict_file, "vocab.txt", "vocabulary of lm");
-DEFINE_string(lm_path, "lm.klm", "language model");
-
-using kaldi::BaseFloat;
-using kaldi::Matrix;
-using std::vector;
-
-// test decoder by feeding nnet posterior probability
-int main(int argc, char* argv[]) {
-    gflags::SetUsageMessage("Usage:");
-    gflags::ParseCommandLineFlags(&argc, &argv, false);
-    google::InitGoogleLogging(argv[0]);
-    google::InstallFailureSignalHandler();
-    FLAGS_logtostderr = 1;
-
-    kaldi::SequentialBaseFloatMatrixReader likelihood_reader(
-        FLAGS_nnet_prob_respecifier);
-    std::string dict_file = FLAGS_dict_file;
-    std::string lm_path = FLAGS_lm_path;
-    LOG(INFO) << "dict path: " << dict_file;
-    LOG(INFO) << "lm path: " << lm_path;
-
-    int32 num_done = 0, num_err = 0;
-
-    ppspeech::CTCBeamSearchOptions opts;
-    opts.dict_file = dict_file;
-    opts.lm_path = lm_path;
-    ppspeech::CTCBeamSearch decoder(opts);
-
-    std::shared_ptr<ppspeech::Decodable> decodable(
-        new ppspeech::Decodable(nullptr, nullptr));
-
-    decoder.InitDecoder();
-
-    for (; !likelihood_reader.Done(); likelihood_reader.Next()) {
-        string utt = likelihood_reader.Key();
-        const kaldi::Matrix<BaseFloat> likelihood = likelihood_reader.Value();
-        LOG(INFO) << "process utt: " << utt;
-        LOG(INFO) << "rows: " << likelihood.NumRows();
-        LOG(INFO) << "cols: " << likelihood.NumCols();
-        decodable->Acceptlikelihood(likelihood);
-        decoder.AdvanceDecode(decodable);
-        std::string result;
-        result = decoder.GetFinalBestPath();
-        KALDI_LOG << " the result of " << utt << " is " << result;
-        decodable->Reset();
-        decoder.Reset();
-        ++num_done;
-    }
-
-    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
-              << " with errors.";
-    return (num_done != 0 ? 0 : 1);
-}
--- a/speechx/speechx/asr/decoder/param.h
+++ b/speechx/speechx/asr/decoder/param.h
@ -15,8 +15,7 @@
 #pragma once

 #include "base/common.h"
-#include "decoder/ctc_beam_search_decoder.h"
-#include "decoder/ctc_tlg_decoder.h"
+//#include "decoder/ctc_tlg_decoder.h"

 // feature
 DEFINE_bool(use_fbank, false, "False for fbank; or linear feature");
--- a/speechx/speechx/asr/nnet/CMakeLists.txt
+++ b/speechx/speechx/asr/nnet/CMakeLists.txt
@ -1,30 +1,12 @@
 set(srcs decodable.cc nnet_producer.cc)

-if(USING_DS2)
-  list(APPEND srcs ds2_nnet.cc)
-endif()
-
-if(USING_U2)
 list(APPEND srcs u2_nnet.cc)
-endif()

 add_library(nnet STATIC ${srcs})
 target_link_libraries(nnet utils)

-if(USING_U2)
 target_compile_options(nnet  PUBLIC ${PADDLE_COMPILE_FLAGS})
 target_include_directories(nnet  PUBLIC ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
-endif()
-
-
-if(USING_DS2)
-  set(bin_name ds2_nnet_main)
-  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
-  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-  target_link_libraries(${bin_name} utils kaldi-util kaldi-matrix gflags glog nnet)
-
-  target_link_libraries(${bin_name} ${DEPS})
-endif()

 # test bin
 #if(USING_U2)
--- a/speechx/speechx/asr/nnet/ds2_nnet.cc
+++ b/speechx/speechx/asr/nnet/ds2_nnet.cc
@ -1,218 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "nnet/ds2_nnet.h"
-
-#include "utils/strings.h"
-
-namespace ppspeech {
-
-using kaldi::Matrix;
-using kaldi::Vector;
-using std::shared_ptr;
-using std::string;
-using std::vector;
-
-void PaddleNnet::InitCacheEncouts(const ModelOptions& opts) {
-    std::vector<std::string> cache_names;
-    cache_names = StrSplit(opts.cache_names, ",");
-    std::vector<std::string> cache_shapes;
-    cache_shapes = StrSplit(opts.cache_shape, ",");
-    assert(cache_shapes.size() == cache_names.size());
-
-    cache_encouts_.clear();
-    cache_names_idx_.clear();
-    for (size_t i = 0; i < cache_shapes.size(); i++) {
-        std::vector<std::string> tmp_shape;
-        tmp_shape = StrSplit(cache_shapes[i], "-");
-        std::vector<int> cur_shape;
-        std::transform(tmp_shape.begin(),
-                       tmp_shape.end(),
-                       std::back_inserter(cur_shape),
-                       [](const std::string& s) { return atoi(s.c_str()); });
-        cache_names_idx_[cache_names[i]] = i;
-        std::shared_ptr<Tensor<BaseFloat>> cache_eout =
-            std::make_shared<Tensor<BaseFloat>>(cur_shape);
-        cache_encouts_.push_back(cache_eout);
-    }
-}
-
-PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
-    subsampling_rate_ = opts.subsample_rate;
-    paddle_infer::Config config;
-    config.SetModel(opts.model_path, opts.param_path);
-    if (opts.use_gpu) {
-        config.EnableUseGpu(500, 0);
-    }
-    config.SwitchIrOptim(opts.switch_ir_optim);
-    if (opts.enable_fc_padding == false) {
-        config.DisableFCPadding();
-    }
-    if (opts.enable_profile) {
-        config.EnableProfile();
-    }
-    pool.reset(
-        new paddle_infer::services::PredictorPool(config, opts.thread_num));
-    if (pool == nullptr) {
-        LOG(ERROR) << "create the predictor pool failed";
-    }
-    pool_usages.resize(opts.thread_num);
-    std::fill(pool_usages.begin(), pool_usages.end(), false);
-    LOG(INFO) << "load paddle model success";
-
-    LOG(INFO) << "start to check the predictor input and output names";
-    LOG(INFO) << "input names: " << opts.input_names;
-    LOG(INFO) << "output names: " << opts.output_names;
-    std::vector<std::string> input_names_vec = StrSplit(opts.input_names, ",");
-    std::vector<std::string> output_names_vec = StrSplit(opts.output_names, ",");
-
-    paddle_infer::Predictor* predictor = GetPredictor();
-
-    std::vector<std::string> model_input_names = predictor->GetInputNames();
-    assert(input_names_vec.size() == model_input_names.size());
-    for (size_t i = 0; i < model_input_names.size(); i++) {
-        assert(input_names_vec[i] == model_input_names[i]);
-    }
-
-    std::vector<std::string> model_output_names = predictor->GetOutputNames();
-    assert(output_names_vec.size() == model_output_names.size());
-    for (size_t i = 0; i < output_names_vec.size(); i++) {
-        assert(output_names_vec[i] == model_output_names[i]);
-    }
-
-    ReleasePredictor(predictor);
-    InitCacheEncouts(opts);
-}
-
-void PaddleNnet::Reset() { InitCacheEncouts(opts_); }
-
-paddle_infer::Predictor* PaddleNnet::GetPredictor() {
-    paddle_infer::Predictor* predictor = nullptr;
-
-    std::lock_guard<std::mutex> guard(pool_mutex);
-    int pred_id = 0;
-
-    while (pred_id < pool_usages.size()) {
-        if (pool_usages[pred_id] == false) {
-            predictor = pool->Retrive(pred_id);
-            break;
-        }
-        ++pred_id;
-    }
-
-    if (predictor) {
-        pool_usages[pred_id] = true;
-        predictor_to_thread_id[predictor] = pred_id;
-    } else {
-        LOG(INFO) << "Failed to get predictor from pool !!!";
-    }
-
-    return predictor;
-}
-
-int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) {
-    std::lock_guard<std::mutex> guard(pool_mutex);
-    auto iter = predictor_to_thread_id.find(predictor);
-
-    if (iter == predictor_to_thread_id.end()) {
-        LOG(INFO) << "there is no such predictor";
-        return 0;
-    }
-
-    pool_usages[iter->second] = false;
-    predictor_to_thread_id.erase(predictor);
-    return 0;
-}
-
-shared_ptr<Tensor<BaseFloat>> PaddleNnet::GetCacheEncoder(const string& name) {
-    auto iter = cache_names_idx_.find(name);
-    if (iter == cache_names_idx_.end()) {
-        return nullptr;
-    }
-    assert(iter->second < cache_encouts_.size());
-    return cache_encouts_[iter->second];
-}
-
-void PaddleNnet::FeedForward(const Vector<BaseFloat>& features,
-                             const int32& feature_dim,
-                             NnetOut* out) {
-    paddle_infer::Predictor* predictor = GetPredictor();
-
-    int feat_row = features.Dim() / feature_dim;
-
-    std::vector<std::string> input_names = predictor->GetInputNames();
-    std::vector<std::string> output_names = predictor->GetOutputNames();
-
-    // feed inputs
-    std::unique_ptr<paddle_infer::Tensor> input_tensor =
-        predictor->GetInputHandle(input_names[0]);
-    std::vector<int> INPUT_SHAPE = {1, feat_row, feature_dim};
-    input_tensor->Reshape(INPUT_SHAPE);
-    input_tensor->CopyFromCpu(features.Data());
-
-    std::unique_ptr<paddle_infer::Tensor> input_len =
-        predictor->GetInputHandle(input_names[1]);
-    std::vector<int> input_len_size = {1};
-    input_len->Reshape(input_len_size);
-    std::vector<int64_t> audio_len;
-    audio_len.push_back(feat_row);
-    input_len->CopyFromCpu(audio_len.data());
-
-    std::unique_ptr<paddle_infer::Tensor> state_h =
-        predictor->GetInputHandle(input_names[2]);
-    shared_ptr<Tensor<BaseFloat>> h_cache = GetCacheEncoder(input_names[2]);
-    state_h->Reshape(h_cache->get_shape());
-    state_h->CopyFromCpu(h_cache->get_data().data());
-
-    std::unique_ptr<paddle_infer::Tensor> state_c =
-        predictor->GetInputHandle(input_names[3]);
-    shared_ptr<Tensor<float>> c_cache = GetCacheEncoder(input_names[3]);
-    state_c->Reshape(c_cache->get_shape());
-    state_c->CopyFromCpu(c_cache->get_data().data());
-
-    // forward
-    bool success = predictor->Run();
-
-    if (success == false) {
-        LOG(INFO) << "predictor run occurs error";
-    }
-
-    // fetch outpus
-    std::unique_ptr<paddle_infer::Tensor> h_out =
-        predictor->GetOutputHandle(output_names[2]);
-    assert(h_cache->get_shape() == h_out->shape());
-    h_out->CopyToCpu(h_cache->get_data().data());
-
-    std::unique_ptr<paddle_infer::Tensor> c_out =
-        predictor->GetOutputHandle(output_names[3]);
-    assert(c_cache->get_shape() == c_out->shape());
-    c_out->CopyToCpu(c_cache->get_data().data());
-
-    std::unique_ptr<paddle_infer::Tensor> output_tensor =
-        predictor->GetOutputHandle(output_names[0]);
-    std::vector<int> output_shape = output_tensor->shape();
-    int32 row = output_shape[1];
-    int32 col = output_shape[2];
-
-
-    // inferences->Resize(row * col);
-    // *inference_dim = col;
-    out->logprobs.Resize(row * col);
-    out->vocab_dim = col;
-    output_tensor->CopyToCpu(out->logprobs.Data());
-
-    ReleasePredictor(predictor);
-}
-
-}  // namespace ppspeech
--- a/speechx/speechx/asr/nnet/ds2_nnet.h
+++ b/speechx/speechx/asr/nnet/ds2_nnet.h
@ -1,97 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <numeric>
-
-#include "base/common.h"
-#include "kaldi/matrix/kaldi-matrix.h"
-#include "nnet/nnet_itf.h"
-#include "paddle_inference_api.h"
-
-namespace ppspeech {
-
-
-template <typename T>
-class Tensor {
-  public:
-    Tensor() {}
-    explicit Tensor(const std::vector<int>& shape) : _shape(shape) {
-        int neml = std::accumulate(
-            _shape.begin(), _shape.end(), 1, std::multiplies<int>());
-        LOG(INFO) << "Tensor neml: " << neml;
-        _data.resize(neml, 0);
-    }
-
-    void reshape(const std::vector<int>& shape) {
-        _shape = shape;
-        int neml = std::accumulate(
-            _shape.begin(), _shape.end(), 1, std::multiplies<int>());
-        _data.resize(neml, 0);
-    }
-
-    const std::vector<int>& get_shape() const { return _shape; }
-    std::vector<T>& get_data() { return _data; }
-
-  private:
-    std::vector<int> _shape;
-    std::vector<T> _data;
-};
-
-class PaddleNnet : public NnetBase {
-  public:
-    explicit PaddleNnet(const ModelOptions& opts);
-
-    void FeedForward(const kaldi::Vector<kaldi::BaseFloat>& features,
-                     const int32& feature_dim,
-                     NnetOut* out) override;
-
-    void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
-                            float reverse_weight,
-                            std::vector<float>* rescoring_score) override {
-        VLOG(2) << "deepspeech2 not has AttentionRescoring.";
-    }
-
-    void Dim();
-
-    void Reset() override;
-
-    bool IsLogProb() override { return false; }
-
-
-    std::shared_ptr<Tensor<kaldi::BaseFloat>> GetCacheEncoder(
-        const std::string& name);
-
-    void InitCacheEncouts(const ModelOptions& opts);
-
-    void EncoderOuts(std::vector<kaldi::Vector<kaldi::BaseFloat>>* encoder_out)
-        const override {}
-
-  private:
-    paddle_infer::Predictor* GetPredictor();
-    int ReleasePredictor(paddle_infer::Predictor* predictor);
-
-    std::unique_ptr<paddle_infer::services::PredictorPool> pool;
-    std::vector<bool> pool_usages;
-    std::mutex pool_mutex;
-    std::map<paddle_infer::Predictor*, int> predictor_to_thread_id;
-    std::map<std::string, int> cache_names_idx_;
-    std::vector<std::shared_ptr<Tensor<kaldi::BaseFloat>>> cache_encouts_;
-
-    ModelOptions opts_;
-
-  public:
-    DISALLOW_COPY_AND_ASSIGN(PaddleNnet);
-};
-
-}  // namespace ppspeech
--- a/speechx/speechx/asr/nnet/ds2_nnet_main.cc
+++ b/speechx/speechx/asr/nnet/ds2_nnet_main.cc
@ -1,142 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/common.h"
-#include "decoder/param.h"
-#include "frontend/audio/assembler.h"
-#include "frontend/audio/data_cache.h"
-#include "kaldi/util/table-types.h"
-#include "nnet/decodable.h"
-#include "nnet/ds2_nnet.h"
-
-DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
-DEFINE_string(nnet_prob_wspecifier, "", "nnet porb wspecifier");
-
-using kaldi::BaseFloat;
-using kaldi::Matrix;
-using std::vector;
-
-int main(int argc, char* argv[]) {
-    gflags::SetUsageMessage("Usage:");
-    gflags::ParseCommandLineFlags(&argc, &argv, false);
-    google::InitGoogleLogging(argv[0]);
-    google::InstallFailureSignalHandler();
-    FLAGS_logtostderr = 1;
-
-    kaldi::SequentialBaseFloatMatrixReader feature_reader(
-        FLAGS_feature_rspecifier);
-    kaldi::BaseFloatMatrixWriter nnet_writer(FLAGS_nnet_prob_wspecifier);
-    std::string model_graph = FLAGS_model_path;
-    std::string model_params = FLAGS_param_path;
-    LOG(INFO) << "model path: " << model_graph;
-    LOG(INFO) << "model param: " << model_params;
-
-    int32 num_done = 0, num_err = 0;
-
-    ppspeech::ModelOptions model_opts = ppspeech::ModelOptions::InitFromFlags();
-
-    std::shared_ptr<ppspeech::PaddleNnet> nnet(
-        new ppspeech::PaddleNnet(model_opts));
-    std::shared_ptr<ppspeech::DataCache> raw_data(new ppspeech::DataCache());
-    std::shared_ptr<ppspeech::Decodable> decodable(
-        new ppspeech::Decodable(nnet, raw_data, FLAGS_acoustic_scale));
-
-    int32 chunk_size = FLAGS_receptive_field_length +
-                       (FLAGS_nnet_decoder_chunk - 1) * FLAGS_subsampling_rate;
-    int32 chunk_stride = FLAGS_subsampling_rate * FLAGS_nnet_decoder_chunk;
-    int32 receptive_field_length = FLAGS_receptive_field_length;
-    LOG(INFO) << "chunk size (frame): " << chunk_size;
-    LOG(INFO) << "chunk stride (frame): " << chunk_stride;
-    LOG(INFO) << "receptive field (frame): " << receptive_field_length;
-    kaldi::Timer timer;
-    for (; !feature_reader.Done(); feature_reader.Next()) {
-        string utt = feature_reader.Key();
-        kaldi::Matrix<BaseFloat> feature = feature_reader.Value();
-        raw_data->SetDim(feature.NumCols());
-        LOG(INFO) << "process utt: " << utt;
-        LOG(INFO) << "rows: " << feature.NumRows();
-        LOG(INFO) << "cols: " << feature.NumCols();
-
-        int32 row_idx = 0;
-        int32 padding_len = 0;
-        int32 ori_feature_len = feature.NumRows();
-        if ((feature.NumRows() - chunk_size) % chunk_stride != 0) {
-            padding_len =
-                chunk_stride - (feature.NumRows() - chunk_size) % chunk_stride;
-            feature.Resize(feature.NumRows() + padding_len,
-                           feature.NumCols(),
-                           kaldi::kCopyData);
-        }
-        int32 num_chunks = (feature.NumRows() - chunk_size) / chunk_stride + 1;
-        int32 frame_idx = 0;
-        std::vector<kaldi::Vector<kaldi::BaseFloat>> prob_vec;
-        for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
-            kaldi::Vector<kaldi::BaseFloat> feature_chunk(chunk_size *
-                                                          feature.NumCols());
-            int32 feature_chunk_size = 0;
-            if (ori_feature_len > chunk_idx * chunk_stride) {
-                feature_chunk_size = std::min(
-                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
-            }
-            if (feature_chunk_size < receptive_field_length) break;
-
-            int32 start = chunk_idx * chunk_stride;
-            for (int row_id = 0; row_id < chunk_size; ++row_id) {
-                kaldi::SubVector<kaldi::BaseFloat> tmp(feature, start);
-                kaldi::SubVector<kaldi::BaseFloat> f_chunk_tmp(
-                    feature_chunk.Data() + row_id * feature.NumCols(),
-                    feature.NumCols());
-                f_chunk_tmp.CopyFromVec(tmp);
-                ++start;
-            }
-            raw_data->Accept(feature_chunk);
-            if (chunk_idx == num_chunks - 1) {
-                raw_data->SetFinished();
-            }
-            vector<kaldi::BaseFloat> prob;
-            while (decodable->FrameLikelihood(frame_idx, &prob)) {
-                kaldi::Vector<kaldi::BaseFloat> vec_tmp(prob.size());
-                std::memcpy(vec_tmp.Data(),
-                            prob.data(),
-                            sizeof(kaldi::BaseFloat) * prob.size());
-                prob_vec.push_back(vec_tmp);
-                frame_idx++;
-            }
-        }
-        decodable->Reset();
-        if (prob_vec.size() == 0) {
-            // the TokenWriter can not write empty string.
-            ++num_err;
-            KALDI_LOG << " the nnet prob of " << utt << " is empty";
-            continue;
-        }
-        kaldi::Matrix<kaldi::BaseFloat> result(prob_vec.size(),
-                                               prob_vec[0].Dim());
-        for (int row_idx = 0; row_idx < prob_vec.size(); ++row_idx) {
-            for (int32 col_idx = 0; col_idx < prob_vec[0].Dim(); ++col_idx) {
-                result(row_idx, col_idx) = prob_vec[row_idx](col_idx);
-            }
-        }
-
-        nnet_writer.Write(utt, result);
-        ++num_done;
-    }
-
-    double elapsed = timer.Elapsed();
-    KALDI_LOG << " cost:" << elapsed << " s";
-
-    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
-              << " with errors.";
-    return (num_done != 0 ? 0 : 1);
-}
--- a/speechx/speechx/asr/nnet/nnet_producer.cc
+++ b/speechx/speechx/asr/nnet/nnet_producer.cc
@ -65,7 +65,6 @@ bool NnetProducer::Compute() {
    size_t nframes = logprobs.Dim() / vocab_dim;
    VLOG(2) << "Forward out " << nframes << " decoder frames.";
    std::vector<BaseFloat> logprob(vocab_dim);
-    // remove later.
    for (size_t idx = 0; idx < nframes; ++idx) {
        for (size_t prob_idx = 0; prob_idx < vocab_dim; ++prob_idx) {
            logprob[prob_idx] = logprobs(idx * vocab_dim + prob_idx);
--- a/speechx/speechx/asr/recognizer/CMakeLists.txt
+++ b/speechx/speechx/asr/recognizer/CMakeLists.txt
@ -1,33 +1,12 @@
 set(srcs)

-if (USING_DS2)
-list(APPEND srcs
-recognizer.cc
-)
-endif()
-
-if (USING_U2)
 list(APPEND srcs
  u2_recognizer.cc
 )
-endif()

 add_library(recognizer STATIC ${srcs})
 target_link_libraries(recognizer PUBLIC decoder)

-# test
-if (USING_DS2)
-  set(BINS recognizer_main)
-
-  foreach(bin_name IN LISTS BINS)
-    add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
-    target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-    target_link_libraries(${bin_name} PUBLIC recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
-  endforeach()
-endif()
-
-
-if (USING_U2)
 set(TEST_BINS 
  u2_recognizer_main
  u2_recognizer_thread_main
@ -42,5 +21,3 @@ if (USING_U2)
  target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS})
 endforeach()

-endif()
-
--- a/speechx/speechx/asr/recognizer/recognizer.cc
+++ b/speechx/speechx/asr/recognizer/recognizer.cc
@ -1,70 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "recognizer/recognizer.h"
-
-
-namespace ppspeech {
-
-using kaldi::BaseFloat;
-using kaldi::SubVector;
-using kaldi::Vector;
-using kaldi::VectorBase;
-using std::unique_ptr;
-using std::vector;
-
-
-Recognizer::Recognizer(const RecognizerResource& resource) {
-    // resource_ = resource;
-    const FeaturePipelineOptions& feature_opts = resource.feature_pipeline_opts;
-    feature_pipeline_.reset(new FeaturePipeline(feature_opts));
-
-    std::shared_ptr<PaddleNnet> nnet(new PaddleNnet(resource.model_opts));
-
-    BaseFloat ac_scale = resource.acoustic_scale;
-    decodable_.reset(new Decodable(nnet, feature_pipeline_, ac_scale));
-
-    decoder_.reset(new TLGDecoder(resource.tlg_opts));
-
-    input_finished_ = false;
-}
-
-void Recognizer::Accept(const Vector<BaseFloat>& waves) {
-    feature_pipeline_->Accept(waves);
-}
-
-void Recognizer::Decode() { decoder_->AdvanceDecode(decodable_); }
-
-std::string Recognizer::GetFinalResult() {
-    return decoder_->GetFinalBestPath();
-}
-
-std::string Recognizer::GetPartialResult() {
-    return decoder_->GetPartialResult();
-}
-
-void Recognizer::SetFinished() {
-    feature_pipeline_->SetFinished();
-    input_finished_ = true;
-}
-
-bool Recognizer::IsFinished() { return input_finished_; }
-
-void Recognizer::Reset() {
-    feature_pipeline_->Reset();
-    decodable_->Reset();
-    decoder_->Reset();
-}
-
-}  // namespace ppspeech
--- a/speechx/speechx/asr/recognizer/recognizer.h
+++ b/speechx/speechx/asr/recognizer/recognizer.h
@ -1,70 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// todo refactor later (SGoat)
-
-#pragma once
-
-#include "decoder/ctc_beam_search_decoder.h"
-#include "decoder/ctc_tlg_decoder.h"
-#include "frontend/audio/feature_pipeline.h"
-#include "nnet/decodable.h"
-#include "nnet/ds2_nnet.h"
-
-DECLARE_double(acoustic_scale);
-
-namespace ppspeech {
-
-struct RecognizerResource {
-    kaldi::BaseFloat acoustic_scale{1.0};
-    FeaturePipelineOptions feature_pipeline_opts{};
-    ModelOptions model_opts{};
-    TLGDecoderOptions tlg_opts{};
-    //    CTCBeamSearchOptions beam_search_opts;
-
-    static RecognizerResource InitFromFlags() {
-        RecognizerResource resource;
-        resource.acoustic_scale = FLAGS_acoustic_scale;
-        resource.feature_pipeline_opts =
-            FeaturePipelineOptions::InitFromFlags();
-        resource.feature_pipeline_opts.assembler_opts.fill_zero = true;
-        LOG(INFO) << "ds2 need fill zero be true: "
-                  << resource.feature_pipeline_opts.assembler_opts.fill_zero;
-        resource.model_opts = ModelOptions::InitFromFlags();
-        resource.tlg_opts = TLGDecoderOptions::InitFromFlags();
-        return resource;
-    }
-};
-
-class Recognizer {
-  public:
-    explicit Recognizer(const RecognizerResource& resouce);
-    void Accept(const kaldi::Vector<kaldi::BaseFloat>& waves);
-    void Decode();
-    std::string GetFinalResult();
-    std::string GetPartialResult();
-    void SetFinished();
-    bool IsFinished();
-    void Reset();
-
-  private:
-    // std::shared_ptr<RecognizerResource> resource_;
-    // RecognizerResource resource_;
-    std::shared_ptr<FeaturePipeline> feature_pipeline_;
-    std::shared_ptr<Decodable> decodable_;
-    std::unique_ptr<TLGDecoder> decoder_;
-    bool input_finished_;
-};
-
-}  // namespace ppspeech
--- a/speechx/speechx/asr/recognizer/recognizer_main.cc
+++ b/speechx/speechx/asr/recognizer/recognizer_main.cc
@ -1,105 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "decoder/param.h"
-#include "kaldi/feat/wave-reader.h"
-#include "kaldi/util/table-types.h"
-#include "recognizer/recognizer.h"
-
-DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
-DEFINE_string(result_wspecifier, "", "test result wspecifier");
-DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
-DEFINE_int32(sample_rate, 16000, "sample rate");
-
-
-int main(int argc, char* argv[]) {
-    gflags::SetUsageMessage("Usage:");
-    gflags::ParseCommandLineFlags(&argc, &argv, false);
-    google::InitGoogleLogging(argv[0]);
-    google::InstallFailureSignalHandler();
-    FLAGS_logtostderr = 1;
-
-    ppspeech::RecognizerResource resource =
-        ppspeech::RecognizerResource::InitFromFlags();
-    ppspeech::Recognizer recognizer(resource);
-
-    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
-        FLAGS_wav_rspecifier);
-    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);
-
-    int sample_rate = FLAGS_sample_rate;
-    float streaming_chunk = FLAGS_streaming_chunk;
-    int chunk_sample_size = streaming_chunk * sample_rate;
-    LOG(INFO) << "sr: " << sample_rate;
-    LOG(INFO) << "chunk size (s): " << streaming_chunk;
-    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;
-
-    int32 num_done = 0, num_err = 0;
-    double tot_wav_duration = 0.0;
-
-    kaldi::Timer timer;
-
-    for (; !wav_reader.Done(); wav_reader.Next()) {
-        std::string utt = wav_reader.Key();
-        const kaldi::WaveData& wave_data = wav_reader.Value();
-
-        int32 this_channel = 0;
-        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
-                                                    this_channel);
-        int tot_samples = waveform.Dim();
-        tot_wav_duration += tot_samples * 1.0 / sample_rate;
-        LOG(INFO) << "wav len (sample): " << tot_samples;
-
-        int sample_offset = 0;
-        std::vector<kaldi::Vector<BaseFloat>> feats;
-        int feature_rows = 0;
-        while (sample_offset < tot_samples) {
-            int cur_chunk_size =
-                std::min(chunk_sample_size, tot_samples - sample_offset);
-
-            kaldi::Vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
-            for (int i = 0; i < cur_chunk_size; ++i) {
-                wav_chunk(i) = waveform(sample_offset + i);
-            }
-            // wav_chunk = waveform.Range(sample_offset + i, cur_chunk_size);
-
-            recognizer.Accept(wav_chunk);
-            if (cur_chunk_size < chunk_sample_size) {
-                recognizer.SetFinished();
-            }
-            recognizer.Decode();
-
-            // no overlap
-            sample_offset += cur_chunk_size;
-        }
-
-        std::string result;
-        result = recognizer.GetFinalResult();
-        recognizer.Reset();
-        if (result.empty()) {
-            // the TokenWriter can not write empty string.
-            ++num_err;
-            KALDI_LOG << " the result of " << utt << " is empty";
-            continue;
-        }
-        KALDI_LOG << " the result of " << utt << " is " << result;
-        result_writer.Write(utt, result);
-        ++num_done;
-    }
-    double elapsed = timer.Elapsed();
-    KALDI_LOG << "Done " << num_done << " out of " << (num_err + num_done);
-    KALDI_LOG << " cost:" << elapsed << " s";
-    KALDI_LOG << "total wav duration is: " << tot_wav_duration << " s";
-    KALDI_LOG << "the RTF is: " << elapsed / tot_wav_duration;
-}
--- a/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
+++ b/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
@ -20,15 +20,12 @@
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/kaldi-io.h"
 #include "utils/file_utils.h"
-// #include "boost/json.hpp"
-#include <boost/json/src.hpp>
+#include "utils/picojson.h"

 DEFINE_string(json_file, "", "cmvn json file");
 DEFINE_string(cmvn_write_path, "./cmvn.ark", "write cmvn");
 DEFINE_bool(binary, true, "write cmvn in binary (true) or text(false)");

-using namespace boost::json;  // from <boost/json.hpp>
-
 int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
@ -40,36 +37,49 @@ int main(int argc, char* argv[]) {

    auto ifs = std::ifstream(FLAGS_json_file);
    std::string json_str = ppspeech::ReadFile2String(FLAGS_json_file);
-    auto value = boost::json::parse(json_str);
-    if (!value.is_object()) {
+    picojson::value value;
+    std::string err;
+    const char* json_end = picojson::parse(
+        value, json_str.c_str(), json_str.c_str() + json_str.size(), &err);
+    if (!value.is<picojson::object>()) {
        LOG(ERROR) << "Input json file format error.";
    }

-    for (auto obj : value.as_object()) {
-        if (obj.key() == "mean_stat") {
-            VLOG(2) << "mean_stat:" << obj.value();
+    const picojson::value::object& obj = value.get<picojson::object>();
+    for (picojson::value::object::const_iterator elem = obj.begin();
+         elem != obj.end();
+         ++elem) {
+        if (elem->first == "mean_stat") {
+            VLOG(2) << "mean_stat:" << elem->second;
+            // const picojson::value tmp =
+            // elem->second.get(0);//<picojson::array>();
+            double tmp =
+                elem->second.get(0).get<double>();  //<picojson::array>();
+            VLOG(2) << "tmp: " << tmp;
        }
-        if (obj.key() == "var_stat") {
-            VLOG(2) << "var_stat: " << obj.value();
+        if (elem->first == "var_stat") {
+            VLOG(2) << "var_stat: " << elem->second;
        }
-        if (obj.key() == "frame_num") {
-            VLOG(2) << "frame_num: " << obj.value();
+        if (elem->first == "frame_num") {
+            VLOG(2) << "frame_num: " << elem->second;
        }
    }

-    boost::json::array mean_stat = value.at("mean_stat").as_array();
+    const picojson::value::array& mean_stat =
+        value.get("mean_stat").get<picojson::array>();
    std::vector<kaldi::BaseFloat> mean_stat_vec;
    for (auto it = mean_stat.begin(); it != mean_stat.end(); it++) {
-        mean_stat_vec.push_back(it->as_double());
+        mean_stat_vec.push_back((*it).get<double>());
    }

-    boost::json::array var_stat = value.at("var_stat").as_array();
+    const picojson::value::array& var_stat =
+        value.get("var_stat").get<picojson::array>();
    std::vector<kaldi::BaseFloat> var_stat_vec;
    for (auto it = var_stat.begin(); it != var_stat.end(); it++) {
-        var_stat_vec.push_back(it->as_double());
+        var_stat_vec.push_back((*it).get<double>());
    }

-    kaldi::int32 frame_num = uint64_t(value.at("frame_num").as_int64());
+    kaldi::int32 frame_num = value.get("frame_num").get<int64_t>();
    LOG(INFO) << "nframe: " << frame_num;

    size_t mean_size = mean_stat_vec.size();
--- a/speechx/speechx/common/utils/picojson.h
+++ b/speechx/speechx/common/utils/picojson.h