[speechx]Speechx directory refactor (#2746)

* refactor directory
3 years ago · 869f4267d5
parent 7804c8e8f3
commit 869f4267d5
100 changed files with 1821 additions and 53 deletions
--- a/speechx/requirement.txt
+++ b/speechx/requirement.txt
@ -1 +0,0 @@
-paddlepaddle>=2.4rc
--- a/speechx/speechx/CMakeLists.txt
+++ b/speechx/speechx/CMakeLists.txt
@ -2,50 +2,11 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

 project(speechx LANGUAGES CXX)

-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-)
-add_subdirectory(kaldi)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/utils
-)
-add_subdirectory(utils)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/frontend
-)
-add_subdirectory(frontend)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/nnet
-)
-add_subdirectory(nnet)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)

-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/decoder
-)
-add_subdirectory(decoder)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/recognizer
-)
-add_subdirectory(recognizer)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/protocol
-)
-add_subdirectory(protocol)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/codelab
-)
+add_subdirectory(asr)
+add_subdirectory(common)
+add_subdirectory(kaldi)
 add_subdirectory(codelab)
--- a/speechx/speechx/asr/CMakeLists.txt
+++ b/speechx/speechx/asr/CMakeLists.txt
@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+project(ASR LANGUAGES CXX)
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/server)
+
+add_subdirectory(decoder)
+add_subdirectory(recognizer)
+add_subdirectory(nnet)
+add_subdirectory(server)
--- a/speechx/speechx/asr/decoder/CMakeLists.txt
+++ b/speechx/speechx/asr/decoder/CMakeLists.txt
--- a/speechx/speechx/asr/decoder/common.h
+++ b/speechx/speechx/asr/decoder/common.h
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
--- a/speechx/speechx/asr/decoder/ctc_beam_search_opt.h
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_opt.h
--- a/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
+++ b/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
@ -0,0 +1,9 @@
+ThreadPool/
+build/
+dist/
+kenlm/
+openfst-1.6.3/
+openfst-1.6.3.tar.gz
+swig_decoders.egg-info/
+decoders_wrap.cxx
+swig_decoders.py
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
@ -0,0 +1,607 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ctc_beam_search_decoder.h"
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <utility>
+
+#include "ThreadPool.h"
+#include "fst/fstlib.h"
+
+#include "decoder_utils.h"
+#include "path_trie.h"
+
+using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
+
+
+std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id) {
+    // dimension check
+    size_t num_time_steps = probs_seq.size();
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        VALID_CHECK_EQ(probs_seq[i].size(),
+                       // vocabulary.size() + 1,
+                       vocabulary.size(),
+                       "The shape of probs_seq does not match with "
+                       "the shape of the vocabulary");
+    }
+
+
+    // assign space id
+    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
+    int space_id = it - vocabulary.begin();
+    // if no space in vocabulary
+    if ((size_t)space_id >= vocabulary.size()) {
+        space_id = -2;
+    }
+    // init prefixes' root
+    PathTrie root;
+    root.score = root.log_prob_b_prev = 0.0;
+    std::vector<PathTrie *> prefixes;
+    prefixes.push_back(&root);
+
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        auto fst_dict =
+            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
+        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
+        root.set_dictionary(dict_ptr);
+        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
+        root.set_matcher(matcher);
+    }
+
+    // prefix search over time
+    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
+        auto &prob = probs_seq[time_step];
+
+        float min_cutoff = -NUM_FLT_INF;
+        bool full_beam = false;
+        if (ext_scorer != nullptr) {
+            size_t num_prefixes = std::min(prefixes.size(), beam_size);
+            std::sort(prefixes.begin(),
+                      prefixes.begin() + num_prefixes,
+                      prefix_compare);
+            min_cutoff = prefixes[num_prefixes - 1]->score +
+                         std::log(prob[blank_id]) -
+                         std::max(0.0, ext_scorer->beta);
+            full_beam = (num_prefixes == beam_size);
+        }
+
+        std::vector<std::pair<size_t, float>> log_prob_idx =
+            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
+        // loop over chars
+        for (size_t index = 0; index < log_prob_idx.size(); index++) {
+            auto c = log_prob_idx[index].first;
+            auto log_prob_c = log_prob_idx[index].second;
+
+            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
+                auto prefix = prefixes[i];
+                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
+                    break;
+                }
+                // blank
+                if (c == blank_id) {
+                    prefix->log_prob_b_cur = log_sum_exp(
+                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
+                    continue;
+                }
+                // repeated character
+                if (c == prefix->character) {
+                    prefix->log_prob_nb_cur =
+                        log_sum_exp(prefix->log_prob_nb_cur,
+                                    log_prob_c + prefix->log_prob_nb_prev);
+                }
+                // get new prefix
+                auto prefix_new = prefix->get_path_trie(c);
+
+                if (prefix_new != nullptr) {
+                    float log_p = -NUM_FLT_INF;
+
+                    if (c == prefix->character &&
+                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
+                        log_p = log_prob_c + prefix->log_prob_b_prev;
+                    } else if (c != prefix->character) {
+                        log_p = log_prob_c + prefix->score;
+                    }
+
+                    // language model scoring
+                    if (ext_scorer != nullptr &&
+                        (c == space_id || ext_scorer->is_character_based())) {
+                        PathTrie *prefix_to_score = nullptr;
+                        // skip scoring the space
+                        if (ext_scorer->is_character_based()) {
+                            prefix_to_score = prefix_new;
+                        } else {
+                            prefix_to_score = prefix;
+                        }
+
+                        float score = 0.0;
+                        std::vector<std::string> ngram;
+                        ngram = ext_scorer->make_ngram(prefix_to_score);
+                        score = ext_scorer->get_log_cond_prob(ngram) *
+                                ext_scorer->alpha;
+                        log_p += score;
+                        log_p += ext_scorer->beta;
+                    }
+                    prefix_new->log_prob_nb_cur =
+                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
+                }
+            }  // end of loop over prefix
+        }      // end of loop over vocabulary
+
+
+        prefixes.clear();
+        // update log probs
+        root.iterate_to_vec(prefixes);
+
+        // only preserve top beam_size prefixes
+        if (prefixes.size() >= beam_size) {
+            std::nth_element(prefixes.begin(),
+                             prefixes.begin() + beam_size,
+                             prefixes.end(),
+                             prefix_compare);
+            for (size_t i = beam_size; i < prefixes.size(); ++i) {
+                prefixes[i]->remove();
+            }
+        }
+    }  // end of loop over time
+
+    // score the last word of each prefix that doesn't end with space
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            auto prefix = prefixes[i];
+            if (!prefix->is_empty() && prefix->character != space_id) {
+                float score = 0.0;
+                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
+                score =
+                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
+                score += ext_scorer->beta;
+                prefix->score += score;
+            }
+        }
+    }
+
+    size_t num_prefixes = std::min(prefixes.size(), beam_size);
+    std::sort(
+        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
+
+    // compute approximate ctc score as the return score, without affecting the
+    // return order of decoding result. To delete when decoder gets stable.
+    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+        double approx_ctc = prefixes[i]->score;
+        if (ext_scorer != nullptr) {
+            std::vector<int> output;
+            prefixes[i]->get_path_vec(output);
+            auto prefix_length = output.size();
+            auto words = ext_scorer->split_labels(output);
+            // remove word insert
+            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
+            // remove language model weight:
+            approx_ctc -=
+                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
+        }
+        prefixes[i]->approx_ctc = approx_ctc;
+    }
+
+    return get_beam_search_result(prefixes, vocabulary, beam_size);
+}
+
+
+std::vector<std::vector<std::pair<double, std::string>>>
+ctc_beam_search_decoding_batch(
+    const std::vector<std::vector<std::vector<double>>> &probs_split,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    size_t num_processes,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id) {
+    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    ThreadPool pool(num_processes);
+    // number of samples
+    size_t batch_size = probs_split.size();
+
+    // enqueue the tasks of decoding
+    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
+    for (size_t i = 0; i < batch_size; ++i) {
+        res.emplace_back(pool.enqueue(ctc_beam_search_decoding,
+                                      probs_split[i],
+                                      vocabulary,
+                                      beam_size,
+                                      cutoff_prob,
+                                      cutoff_top_n,
+                                      ext_scorer,
+                                      blank_id));
+    }
+
+    // get decoding results
+    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
+    for (size_t i = 0; i < batch_size; ++i) {
+        batch_results.emplace_back(res[i].get());
+    }
+    return batch_results;
+}
+
+void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer) {
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        auto fst_dict =
+            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
+        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
+        root->set_dictionary(dict_ptr);
+        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
+        root->set_matcher(matcher);
+    }
+}
+
+void ctc_beam_search_decode_chunk(
+    PathTrie *root,
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id) {
+    // dimension check
+    size_t num_time_steps = probs_seq.size();
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        VALID_CHECK_EQ(probs_seq[i].size(),
+                       // vocabulary.size() + 1,
+                       vocabulary.size(),
+                       "The shape of probs_seq does not match with "
+                       "the shape of the vocabulary");
+    }
+
+    // assign space id
+    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
+    int space_id = it - vocabulary.begin();
+    // if no space in vocabulary
+    if ((size_t)space_id >= vocabulary.size()) {
+        space_id = -2;
+    }
+    // init prefixes' root
+    //
+    // prefix search over time
+    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
+        auto &prob = probs_seq[time_step];
+
+        float min_cutoff = -NUM_FLT_INF;
+        bool full_beam = false;
+        if (ext_scorer != nullptr) {
+            size_t num_prefixes = std::min(prefixes.size(), beam_size);
+            std::sort(prefixes.begin(),
+                      prefixes.begin() + num_prefixes,
+                      prefix_compare);
+            min_cutoff = prefixes[num_prefixes - 1]->score +
+                         std::log(prob[blank_id]) -
+                         std::max(0.0, ext_scorer->beta);
+            full_beam = (num_prefixes == beam_size);
+        }
+
+        std::vector<std::pair<size_t, float>> log_prob_idx =
+            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
+        // loop over chars
+        for (size_t index = 0; index < log_prob_idx.size(); index++) {
+            auto c = log_prob_idx[index].first;
+            auto log_prob_c = log_prob_idx[index].second;
+
+            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
+                auto prefix = prefixes[i];
+                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
+                    break;
+                }
+                // blank
+                if (c == blank_id) {
+                    prefix->log_prob_b_cur = log_sum_exp(
+                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
+                    continue;
+                }
+                // repeated character
+                if (c == prefix->character) {
+                    prefix->log_prob_nb_cur =
+                        log_sum_exp(prefix->log_prob_nb_cur,
+                                    log_prob_c + prefix->log_prob_nb_prev);
+                }
+                // get new prefix
+                auto prefix_new = prefix->get_path_trie(c);
+
+                if (prefix_new != nullptr) {
+                    float log_p = -NUM_FLT_INF;
+
+                    if (c == prefix->character &&
+                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
+                        log_p = log_prob_c + prefix->log_prob_b_prev;
+                    } else if (c != prefix->character) {
+                        log_p = log_prob_c + prefix->score;
+                    }
+
+                    // language model scoring
+                    if (ext_scorer != nullptr &&
+                        (c == space_id || ext_scorer->is_character_based())) {
+                        PathTrie *prefix_to_score = nullptr;
+                        // skip scoring the space
+                        if (ext_scorer->is_character_based()) {
+                            prefix_to_score = prefix_new;
+                        } else {
+                            prefix_to_score = prefix;
+                        }
+
+                        float score = 0.0;
+                        std::vector<std::string> ngram;
+                        ngram = ext_scorer->make_ngram(prefix_to_score);
+                        score = ext_scorer->get_log_cond_prob(ngram) *
+                                ext_scorer->alpha;
+                        log_p += score;
+                        log_p += ext_scorer->beta;
+                    }
+                    prefix_new->log_prob_nb_cur =
+                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
+                }
+            }  // end of loop over prefix
+        }      // end of loop over vocabulary
+
+        prefixes.clear();
+        // update log probs
+
+        root->iterate_to_vec(prefixes);
+
+        // only preserve top beam_size prefixes
+        if (prefixes.size() >= beam_size) {
+            std::nth_element(prefixes.begin(),
+                             prefixes.begin() + beam_size,
+                             prefixes.end(),
+                             prefix_compare);
+            for (size_t i = beam_size; i < prefixes.size(); ++i) {
+                prefixes[i]->remove();
+            }
+        }
+    }  // end of loop over time
+
+    return;
+}
+
+
+std::vector<std::pair<double, std::string>> get_decode_result(
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    Scorer *ext_scorer) {
+    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
+    int space_id = it - vocabulary.begin();
+    // if no space in vocabulary
+    if ((size_t)space_id >= vocabulary.size()) {
+        space_id = -2;
+    }
+    // score the last word of each prefix that doesn't end with space
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            auto prefix = prefixes[i];
+            if (!prefix->is_empty() && prefix->character != space_id) {
+                float score = 0.0;
+                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
+                score =
+                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
+                score += ext_scorer->beta;
+                prefix->score += score;
+            }
+        }
+    }
+
+    size_t num_prefixes = std::min(prefixes.size(), beam_size);
+    std::sort(
+        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
+
+    // compute aproximate ctc score as the return score, without affecting the
+    // return order of decoding result. To delete when decoder gets stable.
+    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+        double approx_ctc = prefixes[i]->score;
+        if (ext_scorer != nullptr) {
+            std::vector<int> output;
+            prefixes[i]->get_path_vec(output);
+            auto prefix_length = output.size();
+            auto words = ext_scorer->split_labels(output);
+            // remove word insert
+            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
+            // remove language model weight:
+            approx_ctc -=
+                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
+        }
+        prefixes[i]->approx_ctc = approx_ctc;
+    }
+
+    std::vector<std::pair<double, std::string>> res =
+        get_beam_search_result(prefixes, vocabulary, beam_size);
+
+    // pay back the last word of each prefix that doesn't end with space (for
+    // decoding by chunk)
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            auto prefix = prefixes[i];
+            if (!prefix->is_empty() && prefix->character != space_id) {
+                float score = 0.0;
+                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
+                score =
+                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
+                score += ext_scorer->beta;
+                prefix->score -= score;
+            }
+        }
+    }
+    return res;
+}
+
+
+void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage) {
+    storage = nullptr;
+}
+
+
+CtcBeamSearchDecoderBatch::~CtcBeamSearchDecoderBatch() {}
+
+CtcBeamSearchDecoderBatch::CtcBeamSearchDecoderBatch(
+    const std::vector<std::string> &vocabulary,
+    size_t batch_size,
+    size_t beam_size,
+    size_t num_processes,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id)
+    : batch_size(batch_size),
+      beam_size(beam_size),
+      num_processes(num_processes),
+      cutoff_prob(cutoff_prob),
+      cutoff_top_n(cutoff_top_n),
+      ext_scorer(ext_scorer),
+      blank_id(blank_id) {
+    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
+    VALID_CHECK_GT(
+        this->num_processes, 0, "num_processes must be nonnegative!");
+    this->vocabulary = vocabulary;
+    for (size_t i = 0; i < batch_size; i++) {
+        this->decoder_storage_vector.push_back(
+            std::unique_ptr<CtcBeamSearchDecoderStorage>(
+                new CtcBeamSearchDecoderStorage()));
+        ctc_beam_search_decode_chunk_begin(
+            this->decoder_storage_vector[i]->root, ext_scorer);
+    }
+};
+
+/**
+ * Input
+ * probs_split: shape [B, T, D]
+ */
+void CtcBeamSearchDecoderBatch::next(
+    const std::vector<std::vector<std::vector<double>>> &probs_split,
+    const std::vector<std::string> &has_value) {
+    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    size_t num_has_value = 0;
+    for (int i = 0; i < has_value.size(); i++)
+        if (has_value[i] == "true") num_has_value += 1;
+    ThreadPool pool(std::min(num_processes, num_has_value));
+    // number of samples
+    size_t probs_num = probs_split.size();
+    VALID_CHECK_EQ(this->batch_size,
+                   probs_num,
+                   "The batch size of the current input data should be same "
+                   "with the input data before");
+
+    // enqueue the tasks of decoding
+    std::vector<std::future<void>> res;
+    for (size_t i = 0; i < batch_size; ++i) {
+        if (has_value[i] == "true") {
+            res.emplace_back(pool.enqueue(
+                ctc_beam_search_decode_chunk,
+                std::ref(this->decoder_storage_vector[i]->root),
+                std::ref(this->decoder_storage_vector[i]->prefixes),
+                probs_split[i],
+                this->vocabulary,
+                this->beam_size,
+                this->cutoff_prob,
+                this->cutoff_top_n,
+                this->ext_scorer,
+                this->blank_id));
+        }
+    }
+
+    for (size_t i = 0; i < batch_size; ++i) {
+        res[i].get();
+    }
+    return;
+};
+
+/**
+ * Return
+ * batch_result: shape[B, beam_size,(-approx_ctc score, string)]
+ */
+std::vector<std::vector<std::pair<double, std::string>>>
+CtcBeamSearchDecoderBatch::decode() {
+    VALID_CHECK_GT(
+        this->num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    ThreadPool pool(this->num_processes);
+    // number of samples
+    // enqueue the tasks of decoding
+    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
+    for (size_t i = 0; i < this->batch_size; ++i) {
+        res.emplace_back(
+            pool.enqueue(get_decode_result,
+                         std::ref(this->decoder_storage_vector[i]->prefixes),
+                         this->vocabulary,
+                         this->beam_size,
+                         this->ext_scorer));
+    }
+    // get decoding results
+    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
+    for (size_t i = 0; i < this->batch_size; ++i) {
+        batch_results.emplace_back(res[i].get());
+    }
+    return batch_results;
+}
+
+
+/**
+ * reset the state of ctcBeamSearchDecoderBatch
+ */
+void CtcBeamSearchDecoderBatch::reset_state(size_t batch_size,
+                                            size_t beam_size,
+                                            size_t num_processes,
+                                            double cutoff_prob,
+                                            size_t cutoff_top_n) {
+    this->batch_size = batch_size;
+    this->beam_size = beam_size;
+    this->num_processes = num_processes;
+    this->cutoff_prob = cutoff_prob;
+    this->cutoff_top_n = cutoff_top_n;
+
+    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
+    VALID_CHECK_GT(
+        this->num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    ThreadPool pool(this->num_processes);
+    // number of samples
+    // enqueue the tasks of decoding
+    std::vector<std::future<void>> res;
+    size_t storage_size = decoder_storage_vector.size();
+    for (size_t i = 0; i < storage_size; i++) {
+        res.emplace_back(pool.enqueue(
+            free_storage, std::ref(this->decoder_storage_vector[i])));
+    }
+    for (size_t i = 0; i < storage_size; ++i) {
+        res[i].get();
+    }
+    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>().swap(
+        decoder_storage_vector);
+    for (size_t i = 0; i < this->batch_size; i++) {
+        this->decoder_storage_vector.push_back(
+            std::unique_ptr<CtcBeamSearchDecoderStorage>(
+                new CtcBeamSearchDecoderStorage()));
+        ctc_beam_search_decode_chunk_begin(
+            this->decoder_storage_vector[i]->root, this->ext_scorer);
+    }
+}
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
@ -0,0 +1,175 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CTC_BEAM_SEARCH_DECODER_H_
+#define CTC_BEAM_SEARCH_DECODER_H_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "scorer.h"
+
+/* CTC Beam Search Decoder
+
+ * Parameters:
+ *     probs_seq: 2-D vector that each element is a vector of probabilities
+ *               over vocabulary of one time step.
+ *     vocabulary: A vector of vocabulary.
+ *     beam_size: The width of beam search.
+ *     cutoff_prob: Cutoff probability for pruning.
+ *     cutoff_top_n: Cutoff number for pruning.
+ *     ext_scorer: External scorer to evaluate a prefix, which consists of
+ *                 n-gram language model scoring and word insertion term.
+ *                 Default null, decoding the input sample without scorer.
+ * Return:
+ *     A vector that each element is a pair of score  and decoding result,
+ *     in desending order.
+*/
+std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob = 1.0,
+    size_t cutoff_top_n = 40,
+    Scorer *ext_scorer = nullptr,
+    size_t blank_id = 0);
+
+
+/* CTC Beam Search Decoder for batch data
+
+ * Parameters:
+ *     probs_seq: 3-D vector that each element is a 2-D vector that can be used
+ *                by ctc_beam_search_decoder().
+ *     vocabulary: A vector of vocabulary.
+ *     beam_size: The width of beam search.
+ *     num_processes: Number of threads for beam search.
+ *     cutoff_prob: Cutoff probability for pruning.
+ *     cutoff_top_n: Cutoff number for pruning.
+ *     ext_scorer: External scorer to evaluate a prefix, which consists of
+ *                 n-gram language model scoring and word insertion term.
+ *                 Default null, decoding the input sample without scorer.
+ * Return:
+ *     A 2-D vector that each element is a vector of beam search decoding
+ *     result for one audio sample.
+*/
+std::vector<std::vector<std::pair<double, std::string>>>
+ctc_beam_search_decoding_batch(
+    const std::vector<std::vector<std::vector<double>>> &probs_split,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    size_t num_processes,
+    double cutoff_prob = 1.0,
+    size_t cutoff_top_n = 40,
+    Scorer *ext_scorer = nullptr,
+    size_t blank_id = 0);
+
+/**
+ * Store the root and prefixes for decoder
+ */
+
+class CtcBeamSearchDecoderStorage {
+  public:
+    PathTrie *root = nullptr;
+    std::vector<PathTrie *> prefixes;
+
+    CtcBeamSearchDecoderStorage() {
+        // init prefixes' root
+        this->root = new PathTrie();
+        this->root->log_prob_b_prev = 0.0;
+        // The score of root is in log scale.Since the prob=1.0, the prob score
+        // in log scale is 0.0
+        this->root->score = root->log_prob_b_prev;
+        // std::vector<PathTrie *> prefixes;
+        this->prefixes.push_back(root);
+    };
+
+    ~CtcBeamSearchDecoderStorage() {
+        if (root != nullptr) {
+            delete root;
+            root = nullptr;
+        }
+    };
+};
+
+/**
+ * The ctc beam search decoder, support batchsize >= 1
+ */
+class CtcBeamSearchDecoderBatch {
+  public:
+    CtcBeamSearchDecoderBatch(const std::vector<std::string> &vocabulary,
+                              size_t batch_size,
+                              size_t beam_size,
+                              size_t num_processes,
+                              double cutoff_prob,
+                              size_t cutoff_top_n,
+                              Scorer *ext_scorer,
+                              size_t blank_id);
+
+    ~CtcBeamSearchDecoderBatch();
+    void next(const std::vector<std::vector<std::vector<double>>> &probs_split,
+              const std::vector<std::string> &has_value);
+
+    std::vector<std::vector<std::pair<double, std::string>>> decode();
+
+    void reset_state(size_t batch_size,
+                     size_t beam_size,
+                     size_t num_processes,
+                     double cutoff_prob,
+                     size_t cutoff_top_n);
+
+  private:
+    std::vector<std::string> vocabulary;
+    size_t batch_size;
+    size_t beam_size;
+    size_t num_processes;
+    double cutoff_prob;
+    size_t cutoff_top_n;
+    Scorer *ext_scorer;
+    size_t blank_id;
+    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>
+        decoder_storage_vector;
+};
+
+/**
+ * function for chunk decoding
+ */
+void ctc_beam_search_decode_chunk(
+    PathTrie *root,
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id);
+
+std::vector<std::pair<double, std::string>> get_decode_result(
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    Scorer *ext_scorer);
+
+/**
+ * free the CtcBeamSearchDecoderStorage
+ */
+void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage);
+
+/**
+ * initialize the root
+ */
+void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer);
+
+#endif  // CTC_BEAM_SEARCH_DECODER_H_
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
@ -0,0 +1,61 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ctc_greedy_decoder.h"
+#include "decoder_utils.h"
+
+std::string ctc_greedy_decoding(
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t blank_id) {
+    // dimension check
+    size_t num_time_steps = probs_seq.size();
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        VALID_CHECK_EQ(probs_seq[i].size(),
+                       vocabulary.size(),
+                       "The shape of probs_seq does not match with "
+                       "the shape of the vocabulary");
+    }
+
+    // size_t blank_id = vocabulary.size();
+
+    std::vector<size_t> max_idx_vec(num_time_steps, 0);
+    std::vector<size_t> idx_vec;
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        double max_prob = 0.0;
+        size_t max_idx = 0;
+        const std::vector<double> &probs_step = probs_seq[i];
+        for (size_t j = 0; j < probs_step.size(); ++j) {
+            if (max_prob < probs_step[j]) {
+                max_idx = j;
+                max_prob = probs_step[j];
+            }
+        }
+        // id with maximum probability in current time step
+        max_idx_vec[i] = max_idx;
+        // deduplicate
+        if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) {
+            idx_vec.push_back(max_idx_vec[i]);
+        }
+    }
+
+    std::string best_path_result;
+    for (size_t i = 0; i < idx_vec.size(); ++i) {
+        if (idx_vec[i] != blank_id) {
+            std::string ch = vocabulary[idx_vec[i]];
+            best_path_result += (ch == kSPACE) ? tSPACE : ch;
+        }
+    }
+    return best_path_result;
+}
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
@ -0,0 +1,35 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CTC_GREEDY_DECODER_H
+#define CTC_GREEDY_DECODER_H
+
+#include <string>
+#include <vector>
+
+/* CTC Greedy (Best Path) Decoder
+ *
+ * Parameters:
+ *     probs_seq: 2-D vector that each element is a vector of probabilities
+ *               over vocabulary of one time step.
+ *     vocabulary: A vector of vocabulary.
+ * Return:
+ *     The decoding result in string
+ */
+std::string ctc_greedy_decoding(
+    const std::vector<std::vector<double>>& probs_seq,
+    const std::vector<std::string>& vocabulary,
+    size_t blank_id);
+
+#endif  // CTC_GREEDY_DECODER_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
@ -0,0 +1,193 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "decoder_utils.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+std::vector<std::pair<size_t, float>> get_pruned_log_probs(
+    const std::vector<double> &prob_step,
+    double cutoff_prob,
+    size_t cutoff_top_n) {
+    std::vector<std::pair<int, double>> prob_idx;
+    for (size_t i = 0; i < prob_step.size(); ++i) {
+        prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
+    }
+    // pruning of vocabulary
+    size_t cutoff_len = prob_step.size();
+    if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
+        std::sort(prob_idx.begin(),
+                  prob_idx.end(),
+                  pair_comp_second_rev<int, double>);
+        if (cutoff_prob < 1.0) {
+            double cum_prob = 0.0;
+            cutoff_len = 0;
+            for (size_t i = 0; i < prob_idx.size(); ++i) {
+                cum_prob += prob_idx[i].second;
+                cutoff_len += 1;
+                if (cum_prob >= cutoff_prob || cutoff_len >= cutoff_top_n)
+                    break;
+            }
+        }
+        prob_idx = std::vector<std::pair<int, double>>(
+            prob_idx.begin(), prob_idx.begin() + cutoff_len);
+    }
+    std::vector<std::pair<size_t, float>> log_prob_idx;
+    for (size_t i = 0; i < cutoff_len; ++i) {
+        log_prob_idx.push_back(std::pair<int, float>(
+            prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN)));
+    }
+    return log_prob_idx;
+}
+
+
+std::vector<std::pair<double, std::string>> get_beam_search_result(
+    const std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size) {
+    // allow for the post processing
+    std::vector<PathTrie *> space_prefixes;
+    if (space_prefixes.empty()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            space_prefixes.push_back(prefixes[i]);
+        }
+    }
+
+    std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare);
+    std::vector<std::pair<double, std::string>> output_vecs;
+    for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) {
+        std::vector<int> output;
+        space_prefixes[i]->get_path_vec(output);
+        // convert index to string
+        std::string output_str;
+        for (size_t j = 0; j < output.size(); j++) {
+            std::string ch = vocabulary[output[j]];
+            output_str += (ch == kSPACE) ? tSPACE : ch;
+        }
+        std::pair<double, std::string> output_pair(
+            -space_prefixes[i]->approx_ctc, output_str);
+        output_vecs.emplace_back(output_pair);
+    }
+
+    return output_vecs;
+}
+
+size_t get_utf8_str_len(const std::string &str) {
+    size_t str_len = 0;
+    for (char c : str) {
+        str_len += ((c & 0xc0) != 0x80);
+    }
+    return str_len;
+}
+
+std::vector<std::string> split_utf8_str(const std::string &str) {
+    std::vector<std::string> result;
+    std::string out_str;
+
+    for (char c : str) {
+        if ((c & 0xc0) != 0x80)  // new UTF-8 character
+        {
+            if (!out_str.empty()) {
+                result.push_back(out_str);
+                out_str.clear();
+            }
+        }
+
+        out_str.append(1, c);
+    }
+    result.push_back(out_str);
+    return result;
+}
+
+std::vector<std::string> split_str(const std::string &s,
+                                   const std::string &delim) {
+    std::vector<std::string> result;
+    std::size_t start = 0, delim_len = delim.size();
+    while (true) {
+        std::size_t end = s.find(delim, start);
+        if (end == std::string::npos) {
+            if (start < s.size()) {
+                result.push_back(s.substr(start));
+            }
+            break;
+        }
+        if (end > start) {
+            result.push_back(s.substr(start, end - start));
+        }
+        start = end + delim_len;
+    }
+    return result;
+}
+
+bool prefix_compare(const PathTrie *x, const PathTrie *y) {
+    if (x->score == y->score) {
+        if (x->character == y->character) {
+            return false;
+        } else {
+            return (x->character < y->character);
+        }
+    } else {
+        return x->score > y->score;
+    }
+}
+
+void add_word_to_fst(const std::vector<int> &word,
+                     fst::StdVectorFst *dictionary) {
+    if (dictionary->NumStates() == 0) {
+        fst::StdVectorFst::StateId start = dictionary->AddState();
+        assert(start == 0);
+        dictionary->SetStart(start);
+    }
+    fst::StdVectorFst::StateId src = dictionary->Start();
+    fst::StdVectorFst::StateId dst;
+    for (auto c : word) {
+        dst = dictionary->AddState();
+        dictionary->AddArc(src, fst::StdArc(c, c, 0, dst));
+        src = dst;
+    }
+    dictionary->SetFinal(dst, fst::StdArc::Weight::One());
+}
+
+bool add_word_to_dictionary(
+    const std::string &word,
+    const std::unordered_map<std::string, int> &char_map,
+    bool add_space,
+    int SPACE_ID,
+    fst::StdVectorFst *dictionary) {
+    auto characters = split_utf8_str(word);
+
+    std::vector<int> int_word;
+
+    for (auto &c : characters) {
+        if (c == " ") {
+            int_word.push_back(SPACE_ID);
+        } else {
+            auto int_c = char_map.find(c);
+            if (int_c != char_map.end()) {
+                int_word.push_back(int_c->second);
+            } else {
+                return false;  // return without adding
+            }
+        }
+    }
+
+    if (add_space) {
+        int_word.push_back(SPACE_ID);
+    }
+
+    add_word_to_fst(int_word, dictionary);
+    return true;  // return with successful adding
+}
--- a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
@ -0,0 +1,111 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DECODER_UTILS_H_
+#define DECODER_UTILS_H_
+
+#include <string>
+#include <utility>
+#include "fst/log.h"
+#include "path_trie.h"
+
+const std::string kSPACE = "<space>";
+const std::string tSPACE = " ";
+const float NUM_FLT_INF = std::numeric_limits<float>::max();
+const float NUM_FLT_MIN = std::numeric_limits<float>::min();
+
+// inline function for validation check
+inline void check(
+    bool x, const char *expr, const char *file, int line, const char *err) {
+    if (!x) {
+        std::cout << "[" << file << ":" << line << "] ";
+        LOG(FATAL) << "\"" << expr << "\" check failed. " << err;
+    }
+}
+
+#define VALID_CHECK(x, info) \
+    check(static_cast<bool>(x), #x, __FILE__, __LINE__, info)
+#define VALID_CHECK_EQ(x, y, info) VALID_CHECK((x) == (y), info)
+#define VALID_CHECK_GT(x, y, info) VALID_CHECK((x) > (y), info)
+#define VALID_CHECK_LT(x, y, info) VALID_CHECK((x) < (y), info)
+
+
+// Function template for comparing two pairs
+template <typename T1, typename T2>
+bool pair_comp_first_rev(const std::pair<T1, T2> &a,
+                         const std::pair<T1, T2> &b) {
+    return a.first > b.first;
+}
+
+// Function template for comparing two pairs
+template <typename T1, typename T2>
+bool pair_comp_second_rev(const std::pair<T1, T2> &a,
+                          const std::pair<T1, T2> &b) {
+    return a.second > b.second;
+}
+
+// Return the sum of two probabilities in log scale
+template <typename T>
+T log_sum_exp(const T &x, const T &y) {
+    static T num_min = -std::numeric_limits<T>::max();
+    if (x <= num_min) return y;
+    if (y <= num_min) return x;
+    T xmax = std::max(x, y);
+    return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
+}
+
+// Get pruned probability vector for each time step's beam search
+std::vector<std::pair<size_t, float>> get_pruned_log_probs(
+    const std::vector<double> &prob_step,
+    double cutoff_prob,
+    size_t cutoff_top_n);
+
+// Get beam search result from prefixes in trie tree
+std::vector<std::pair<double, std::string>> get_beam_search_result(
+    const std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size);
+
+// Functor for prefix comparsion
+bool prefix_compare(const PathTrie *x, const PathTrie *y);
+
+/* Get length of utf8 encoding string
+ * See: http://stackoverflow.com/a/4063229
+ */
+size_t get_utf8_str_len(const std::string &str);
+
+/* Split a string into a list of strings on a given string
+ * delimiter. NB: delimiters on beginning / end of string are
+ * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"].
+ */
+std::vector<std::string> split_str(const std::string &s,
+                                   const std::string &delim);
+
+/* Splits string into vector of strings representing
+ * UTF-8 characters (not same as chars)
+ */
+std::vector<std::string> split_utf8_str(const std::string &str);
+
+// Add a word in index to the dicionary of fst
+void add_word_to_fst(const std::vector<int> &word,
+                     fst::StdVectorFst *dictionary);
+
+// Add a word in string to dictionary
+bool add_word_to_dictionary(
+    const std::string &word,
+    const std::unordered_map<std::string, int> &char_map,
+    bool add_space,
+    int SPACE_ID,
+    fst::StdVectorFst *dictionary);
+#endif  // DECODER_UTILS_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
@ -0,0 +1,164 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "path_trie.h"
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "decoder_utils.h"
+
+PathTrie::PathTrie() {
+    log_prob_b_prev = -NUM_FLT_INF;
+    log_prob_nb_prev = -NUM_FLT_INF;
+    log_prob_b_cur = -NUM_FLT_INF;
+    log_prob_nb_cur = -NUM_FLT_INF;
+    score = -NUM_FLT_INF;
+
+    ROOT_ = -1;
+    character = ROOT_;
+    exists_ = true;
+    parent = nullptr;
+
+    dictionary_ = nullptr;
+    dictionary_state_ = 0;
+    has_dictionary_ = false;
+
+    matcher_ = nullptr;
+}
+
+PathTrie::~PathTrie() {
+    for (auto child : children_) {
+        delete child.second;
+        child.second = nullptr;
+    }
+}
+
+PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
+    auto child = children_.begin();
+    for (child = children_.begin(); child != children_.end(); ++child) {
+        if (child->first == new_char) {
+            break;
+        }
+    }
+    if (child != children_.end()) {
+        if (!child->second->exists_) {
+            child->second->exists_ = true;
+            child->second->log_prob_b_prev = -NUM_FLT_INF;
+            child->second->log_prob_nb_prev = -NUM_FLT_INF;
+            child->second->log_prob_b_cur = -NUM_FLT_INF;
+            child->second->log_prob_nb_cur = -NUM_FLT_INF;
+        }
+        return (child->second);
+    } else {
+        if (has_dictionary_) {
+            matcher_->SetState(dictionary_state_);
+            bool found = matcher_->Find(new_char + 1);
+            if (!found) {
+                // Adding this character causes word outside dictionary
+                auto FSTZERO = fst::TropicalWeight::Zero();
+                auto final_weight = dictionary_->Final(dictionary_state_);
+                bool is_final = (final_weight != FSTZERO);
+                if (is_final && reset) {
+                    dictionary_state_ = dictionary_->Start();
+                }
+                return nullptr;
+            } else {
+                PathTrie* new_path = new PathTrie;
+                new_path->character = new_char;
+                new_path->parent = this;
+                new_path->dictionary_ = dictionary_;
+                new_path->dictionary_state_ = matcher_->Value().nextstate;
+                new_path->has_dictionary_ = true;
+                new_path->matcher_ = matcher_;
+                children_.push_back(std::make_pair(new_char, new_path));
+                return new_path;
+            }
+        } else {
+            PathTrie* new_path = new PathTrie;
+            new_path->character = new_char;
+            new_path->parent = this;
+            children_.push_back(std::make_pair(new_char, new_path));
+            return new_path;
+        }
+    }
+}
+
+PathTrie* PathTrie::get_path_vec(std::vector<int>& output) {
+    return get_path_vec(output, ROOT_);
+}
+
+PathTrie* PathTrie::get_path_vec(std::vector<int>& output,
+                                 int stop,
+                                 size_t max_steps) {
+    if (character == stop || character == ROOT_ || output.size() == max_steps) {
+        std::reverse(output.begin(), output.end());
+        return this;
+    } else {
+        output.push_back(character);
+        return parent->get_path_vec(output, stop, max_steps);
+    }
+}
+
+void PathTrie::iterate_to_vec(std::vector<PathTrie*>& output) {
+    if (exists_) {
+        log_prob_b_prev = log_prob_b_cur;
+        log_prob_nb_prev = log_prob_nb_cur;
+
+        log_prob_b_cur = -NUM_FLT_INF;
+        log_prob_nb_cur = -NUM_FLT_INF;
+
+        score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev);
+        output.push_back(this);
+    }
+    for (auto child : children_) {
+        child.second->iterate_to_vec(output);
+    }
+}
+
+void PathTrie::remove() {
+    exists_ = false;
+    if (children_.size() == 0) {
+        if (parent != nullptr) {
+            auto child = parent->children_.begin();
+            for (child = parent->children_.begin();
+                 child != parent->children_.end();
+                 ++child) {
+                if (child->first == character) {
+                    parent->children_.erase(child);
+                    break;
+                }
+            }
+            if (parent->children_.size() == 0 && !parent->exists_) {
+                parent->remove();
+            }
+        }
+        delete this;
+    }
+}
+
+
+void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) {
+    dictionary_ = dictionary;
+    dictionary_state_ = dictionary->Start();
+    has_dictionary_ = true;
+}
+
+using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
+void PathTrie::set_matcher(std::shared_ptr<FSTMATCH> matcher) {
+    matcher_ = matcher;
+}
--- a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
@ -0,0 +1,82 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PATH_TRIE_H
+#define PATH_TRIE_H
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "fst/fstlib.h"
+
+/* Trie tree for prefix storing and manipulating, with a dictionary in
+ * finite-state transducer for spelling correction.
+ */
+class PathTrie {
+  public:
+    PathTrie();
+    ~PathTrie();
+
+    // get new prefix after appending new char
+    PathTrie* get_path_trie(int new_char, bool reset = true);
+
+    // get the prefix in index from root to current node
+    PathTrie* get_path_vec(std::vector<int>& output);
+
+    // get the prefix in index from some stop node to current nodel
+    PathTrie* get_path_vec(
+        std::vector<int>& output,
+        int stop,
+        size_t max_steps = std::numeric_limits<size_t>::max());
+
+    // update log probs
+    void iterate_to_vec(std::vector<PathTrie*>& output);
+
+    // set dictionary for FST
+    void set_dictionary(fst::StdVectorFst* dictionary);
+
+    void set_matcher(std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>>);
+
+    bool is_empty() { return ROOT_ == character; }
+
+    // remove current path from root
+    void remove();
+
+    float log_prob_b_prev;
+    float log_prob_nb_prev;
+    float log_prob_b_cur;
+    float log_prob_nb_cur;
+    float score;
+    float approx_ctc;
+    int character;
+    PathTrie* parent;
+
+  private:
+    int ROOT_;
+    bool exists_;
+    bool has_dictionary_;
+
+    std::vector<std::pair<int, PathTrie*>> children_;
+
+    // pointer to dictionary of FST
+    fst::StdVectorFst* dictionary_;
+    fst::StdVectorFst::StateId dictionary_state_;
+    // true if finding ars in FST
+    std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>> matcher_;
+};
+
+#endif  // PATH_TRIE_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
@ -0,0 +1,232 @@
+// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
+// "COPYING.LESSER.3");
+
+#include "scorer.h"
+
+#include <unistd.h>
+#include <iostream>
+
+#include "lm/config.hh"
+#include "lm/model.hh"
+#include "lm/state.hh"
+
+#include "decoder_utils.h"
+
+using namespace lm::ngram;
+// if your platform is windows ,you need add the define
+#define    F_OK    0
+Scorer::Scorer(double alpha,
+               double beta,
+               const std::string& lm_path,
+               const std::vector<std::string>& vocab_list) {
+    this->alpha = alpha;
+    this->beta = beta;
+
+    dictionary = nullptr;
+    is_character_based_ = true;
+    language_model_ = nullptr;
+
+    max_order_ = 0;
+    dict_size_ = 0;
+    SPACE_ID_ = -1;
+
+    setup(lm_path, vocab_list);
+}
+
+Scorer::~Scorer() {
+    if (language_model_ != nullptr) {
+        delete static_cast<lm::base::Model*>(language_model_);
+    }
+    if (dictionary != nullptr) {
+        delete static_cast<fst::StdVectorFst*>(dictionary);
+    }
+}
+
+void Scorer::setup(const std::string& lm_path,
+                   const std::vector<std::string>& vocab_list) {
+    // load language model
+    load_lm(lm_path);
+    // set char map for scorer
+    set_char_map(vocab_list);
+    // fill the dictionary for FST
+    if (!is_character_based()) {
+        fill_dictionary(true);
+    }
+}
+
+void Scorer::load_lm(const std::string& lm_path) {
+    const char* filename = lm_path.c_str();
+    VALID_CHECK_EQ(access(filename, F_OK), 0, "Invalid language model path");
+
+    RetriveStrEnumerateVocab enumerate;
+    lm::ngram::Config config;
+    config.enumerate_vocab = &enumerate;
+    language_model_ = lm::ngram::LoadVirtual(filename, config);
+    max_order_ = static_cast<lm::base::Model*>(language_model_)->Order();
+    vocabulary_ = enumerate.vocabulary;
+    for (size_t i = 0; i < vocabulary_.size(); ++i) {
+        if (is_character_based_ && vocabulary_[i] != UNK_TOKEN &&
+            vocabulary_[i] != START_TOKEN && vocabulary_[i] != END_TOKEN &&
+            get_utf8_str_len(enumerate.vocabulary[i]) > 1) {
+            is_character_based_ = false;
+        }
+    }
+}
+
+double Scorer::get_log_cond_prob(const std::vector<std::string>& words) {
+    lm::base::Model* model = static_cast<lm::base::Model*>(language_model_);
+    double cond_prob;
+    lm::ngram::State state, tmp_state, out_state;
+    // avoid to inserting <s> in begin
+    model->NullContextWrite(&state);
+    for (size_t i = 0; i < words.size(); ++i) {
+        lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]);
+        // encounter OOV
+        if (word_index == 0) {
+            return OOV_SCORE;
+        }
+        cond_prob = model->BaseScore(&state, word_index, &out_state);
+        tmp_state = state;
+        state = out_state;
+        out_state = tmp_state;
+    }
+    // return  log10 prob
+    return cond_prob;
+}
+
+double Scorer::get_sent_log_prob(const std::vector<std::string>& words) {
+    std::vector<std::string> sentence;
+    if (words.size() == 0) {
+        for (size_t i = 0; i < max_order_; ++i) {
+            sentence.push_back(START_TOKEN);
+        }
+    } else {
+        for (size_t i = 0; i < max_order_ - 1; ++i) {
+            sentence.push_back(START_TOKEN);
+        }
+        sentence.insert(sentence.end(), words.begin(), words.end());
+    }
+    sentence.push_back(END_TOKEN);
+    return get_log_prob(sentence);
+}
+
+double Scorer::get_log_prob(const std::vector<std::string>& words) {
+    assert(words.size() > max_order_);
+    double score = 0.0;
+    for (size_t i = 0; i < words.size() - max_order_ + 1; ++i) {
+        std::vector<std::string> ngram(words.begin() + i,
+                                       words.begin() + i + max_order_);
+        score += get_log_cond_prob(ngram);
+    }
+    return score;
+}
+
+void Scorer::reset_params(float alpha, float beta) {
+    this->alpha = alpha;
+    this->beta = beta;
+}
+
+std::string Scorer::vec2str(const std::vector<int>& input) {
+    std::string word;
+    for (auto ind : input) {
+        word += char_list_[ind];
+    }
+    return word;
+}
+
+std::vector<std::string> Scorer::split_labels(const std::vector<int>& labels) {
+    if (labels.empty()) return {};
+
+    std::string s = vec2str(labels);
+    std::vector<std::string> words;
+    if (is_character_based_) {
+        words = split_utf8_str(s);
+    } else {
+        words = split_str(s, " ");
+    }
+    return words;
+}
+
+void Scorer::set_char_map(const std::vector<std::string>& char_list) {
+    char_list_ = char_list;
+    char_map_.clear();
+
+    // Set the char map for the FST for spelling correction
+    for (size_t i = 0; i < char_list_.size(); i++) {
+        if (char_list_[i] == kSPACE) {
+            SPACE_ID_ = i;
+        }
+        // The initial state of FST is state 0, hence the index of chars in
+        // the FST should start from 1 to avoid the conflict with the initial
+        // state, otherwise wrong decoding results would be given.
+        char_map_[char_list_[i]] = i + 1;
+    }
+}
+
+std::vector<std::string> Scorer::make_ngram(PathTrie* prefix) {
+    std::vector<std::string> ngram;
+    PathTrie* current_node = prefix;
+    PathTrie* new_node = nullptr;
+
+    for (int order = 0; order < max_order_; order++) {
+        std::vector<int> prefix_vec;
+
+        if (is_character_based_) {
+            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_, 1);
+            current_node = new_node;
+        } else {
+            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_);
+            current_node = new_node->parent;  // Skipping spaces
+        }
+
+        // reconstruct word
+        std::string word = vec2str(prefix_vec);
+        ngram.push_back(word);
+
+        if (new_node->character == -1) {
+            // No more spaces, but still need order
+            for (int i = 0; i < max_order_ - order - 1; i++) {
+                ngram.push_back(START_TOKEN);
+            }
+            break;
+        }
+    }
+    std::reverse(ngram.begin(), ngram.end());
+    return ngram;
+}
+
+void Scorer::fill_dictionary(bool add_space) {
+    fst::StdVectorFst dictionary;
+    // For each unigram convert to ints and put in trie
+    int dict_size = 0;
+    for (const auto& word : vocabulary_) {
+        bool added = add_word_to_dictionary(
+            word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
+        dict_size += added ? 1 : 0;
+    }
+
+    dict_size_ = dict_size;
+
+    /* Simplify FST
+
+     * This gets rid of "epsilon" transitions in the FST.
+     * These are transitions that don't require a string input to be taken.
+     * Getting rid of them is necessary to make the FST deterministic, but
+     * can greatly increase the size of the FST
+     */
+    fst::RmEpsilon(&dictionary);
+    fst::StdVectorFst* new_dict = new fst::StdVectorFst;
+
+    /* This makes the FST deterministic, meaning for any string input there's
+     * only one possible state the FST could be in.  It is assumed our
+     * dictionary is deterministic when using it.
+     * (lest we'd have to check for multiple transitions at each state)
+     */
+    fst::Determinize(dictionary, new_dict);
+
+    /* Finds the simplest equivalent fst. This is unnecessary but decreases
+     * memory usage of the dictionary
+     */
+    fst::Minimize(new_dict);
+    this->dictionary = new_dict;
+}
--- a/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
@ -0,0 +1,114 @@
+// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
+// "COPYING.LESSER.3");
+
+#ifndef SCORER_H_
+#define SCORER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "lm/enumerate_vocab.hh"
+#include "lm/virtual_interface.hh"
+#include "lm/word_index.hh"
+
+#include "path_trie.h"
+
+const double OOV_SCORE = -1000.0;
+const std::string START_TOKEN = "<s>";
+const std::string UNK_TOKEN = "<unk>";
+const std::string END_TOKEN = "</s>";
+
+// Implement a callback to retrive the dictionary of language model.
+class RetriveStrEnumerateVocab : public lm::EnumerateVocab {
+  public:
+    RetriveStrEnumerateVocab() {}
+
+    void Add(lm::WordIndex index, const StringPiece &str) {
+        vocabulary.push_back(std::string(str.data(), str.length()));
+    }
+
+    std::vector<std::string> vocabulary;
+};
+
+/* External scorer to query score for n-gram or sentence, including language
+ * model scoring and word insertion.
+ *
+ * Example:
+ *     Scorer scorer(alpha, beta, "path_of_language_model");
+ *     scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" });
+ *     scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" });
+ */
+class Scorer {
+  public:
+    Scorer(double alpha,
+           double beta,
+           const std::string &lm_path,
+           const std::vector<std::string> &vocabulary);
+    ~Scorer();
+
+    double get_log_cond_prob(const std::vector<std::string> &words);
+
+    double get_sent_log_prob(const std::vector<std::string> &words);
+
+    // return the max order
+    size_t get_max_order() const { return max_order_; }
+
+    // return the dictionary size of language model
+    size_t get_dict_size() const { return dict_size_; }
+
+    // retrun true if the language model is character based
+    bool is_character_based() const { return is_character_based_; }
+
+    // reset params alpha & beta
+    void reset_params(float alpha, float beta);
+
+    // make ngram for a given prefix
+    std::vector<std::string> make_ngram(PathTrie *prefix);
+
+    // trransform the labels in index to the vector of words (word based lm) or
+    // the vector of characters (character based lm)
+    std::vector<std::string> split_labels(const std::vector<int> &labels);
+
+    // language model weight
+    double alpha;
+    // word insertion weight
+    double beta;
+
+    // pointer to the dictionary of FST
+    void *dictionary;
+
+  protected:
+    // necessary setup: load language model, set char map, fill FST's dictionary
+    void setup(const std::string &lm_path,
+               const std::vector<std::string> &vocab_list);
+
+    // load language model from given path
+    void load_lm(const std::string &lm_path);
+
+    // fill dictionary for FST
+    void fill_dictionary(bool add_space);
+
+    // set char map
+    void set_char_map(const std::vector<std::string> &char_list);
+
+    double get_log_prob(const std::vector<std::string> &words);
+
+    // translate the vector in index to string
+    std::string vec2str(const std::vector<int> &input);
+
+  private:
+    void *language_model_;
+    bool is_character_based_;
+    size_t max_order_;
+    size_t dict_size_;
+
+    int SPACE_ID_;
+    std::vector<std::string> char_list_;
+    std::unordered_map<std::string, int> char_map_;
+
+    std::vector<std::string> vocabulary_;
+};
+
+#endif  // SCORER_H_
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
@ -84,7 +84,7 @@ void CTCPrefixBeamSearch::AdvanceDecode(

        timer.Reset();
        std::vector<std::vector<kaldi::BaseFloat>> likelihood;
-        likelihood.push_back(frame_prob);
+        likelihood.push_back(std::move(frame_prob));
        AdvanceDecoding(likelihood);
        search_cost += timer.Elapsed();

--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder.h
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
--- a/speechx/speechx/asr/decoder/decoder_itf.h
+++ b/speechx/speechx/asr/decoder/decoder_itf.h
--- a/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
--- a/speechx/speechx/asr/decoder/param.h
+++ b/speechx/speechx/asr/decoder/param.h
--- a/speechx/speechx/asr/nnet/CMakeLists.txt
+++ b/speechx/speechx/asr/nnet/CMakeLists.txt
--- a/speechx/speechx/asr/nnet/decodable.cc
+++ b/speechx/speechx/asr/nnet/decodable.cc
--- a/speechx/speechx/asr/nnet/decodable.h
+++ b/speechx/speechx/asr/nnet/decodable.h
--- a/speechx/speechx/asr/nnet/ds2_nnet.cc
+++ b/speechx/speechx/asr/nnet/ds2_nnet.cc
--- a/speechx/speechx/asr/nnet/ds2_nnet.h
+++ b/speechx/speechx/asr/nnet/ds2_nnet.h
--- a/speechx/speechx/asr/nnet/ds2_nnet_main.cc
+++ b/speechx/speechx/asr/nnet/ds2_nnet_main.cc
--- a/speechx/speechx/asr/nnet/nnet_itf.h
+++ b/speechx/speechx/asr/nnet/nnet_itf.h
--- a/speechx/speechx/asr/nnet/u2_nnet.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet.cc
--- a/speechx/speechx/asr/nnet/u2_nnet.h
+++ b/speechx/speechx/asr/nnet/u2_nnet.h
--- a/speechx/speechx/asr/nnet/u2_nnet_main.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet_main.cc
--- a/speechx/speechx/asr/recognizer/CMakeLists.txt
+++ b/speechx/speechx/asr/recognizer/CMakeLists.txt
--- a/speechx/speechx/asr/recognizer/recognizer.cc
+++ b/speechx/speechx/asr/recognizer/recognizer.cc
--- a/speechx/speechx/asr/recognizer/recognizer.h
+++ b/speechx/speechx/asr/recognizer/recognizer.h
--- a/speechx/speechx/asr/recognizer/recognizer_main.cc
+++ b/speechx/speechx/asr/recognizer/recognizer_main.cc
--- a/speechx/speechx/asr/recognizer/u2_recognizer.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.cc
--- a/speechx/speechx/asr/recognizer/u2_recognizer.h
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.h
--- a/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
--- a/speechx/speechx/asr/server/CMakeLists.txt
+++ b/speechx/speechx/asr/server/CMakeLists.txt
--- a/speechx/speechx/asr/server/websocket/CMakeLists.txt
+++ b/speechx/speechx/asr/server/websocket/CMakeLists.txt
--- a/speechx/speechx/asr/server/websocket/websocket_client.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_client.cc
--- a/speechx/speechx/asr/server/websocket/websocket_client.h
+++ b/speechx/speechx/asr/server/websocket/websocket_client.h
--- a/speechx/speechx/asr/server/websocket/websocket_client_main.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_client_main.cc
--- a/speechx/speechx/asr/server/websocket/websocket_server.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_server.cc
--- a/speechx/speechx/asr/server/websocket/websocket_server.h
+++ b/speechx/speechx/asr/server/websocket/websocket_server.h
--- a/speechx/speechx/asr/server/websocket/websocket_server_main.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_server_main.cc
--- a/speechx/speechx/common/CMakeLists.txt
+++ b/speechx/speechx/common/CMakeLists.txt
@ -0,0 +1,16 @@
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/base
+)
+
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}/../
+${CMAKE_CURRENT_SOURCE_DIR}/utils
+)
+add_subdirectory(utils)
+
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/frontend
+)
+add_subdirectory(frontend)
--- a/speechx/speechx/common/base/basic_types.h
+++ b/speechx/speechx/common/base/basic_types.h
--- a/speechx/speechx/common/base/common.h
+++ b/speechx/speechx/common/base/common.h
--- a/speechx/speechx/common/base/flags.h
+++ b/speechx/speechx/common/base/flags.h
--- a/speechx/speechx/common/base/log.h
+++ b/speechx/speechx/common/base/log.h
--- a/speechx/speechx/common/base/macros.h
+++ b/speechx/speechx/common/base/macros.h
--- a/speechx/speechx/common/base/thread_pool.h
+++ b/speechx/speechx/common/base/thread_pool.h
--- a/speechx/speechx/common/frontend/CMakeLists.txt
+++ b/speechx/speechx/common/frontend/CMakeLists.txt
--- a/speechx/speechx/common/frontend/audio/CMakeLists.txt
+++ b/speechx/speechx/common/frontend/audio/CMakeLists.txt
--- a/speechx/speechx/common/frontend/audio/assembler.cc
+++ b/speechx/speechx/common/frontend/audio/assembler.cc
--- a/speechx/speechx/common/frontend/audio/assembler.h
+++ b/speechx/speechx/common/frontend/audio/assembler.h
--- a/speechx/speechx/common/frontend/audio/audio_cache.cc
+++ b/speechx/speechx/common/frontend/audio/audio_cache.cc
--- a/speechx/speechx/common/frontend/audio/audio_cache.h
+++ b/speechx/speechx/common/frontend/audio/audio_cache.h
--- a/speechx/speechx/common/frontend/audio/cmvn.cc
+++ b/speechx/speechx/common/frontend/audio/cmvn.cc
--- a/speechx/speechx/common/frontend/audio/cmvn.h
+++ b/speechx/speechx/common/frontend/audio/cmvn.h
--- a/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
+++ b/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
--- a/speechx/speechx/common/frontend/audio/compute_fbank_main.cc
+++ b/speechx/speechx/common/frontend/audio/compute_fbank_main.cc
--- a/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
+++ b/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
--- a/speechx/speechx/common/frontend/audio/data_cache.h
+++ b/speechx/speechx/common/frontend/audio/data_cache.h
--- a/speechx/speechx/common/frontend/audio/db_norm.cc
+++ b/speechx/speechx/common/frontend/audio/db_norm.cc
--- a/speechx/speechx/common/frontend/audio/db_norm.h
+++ b/speechx/speechx/common/frontend/audio/db_norm.h
--- a/speechx/speechx/common/frontend/audio/fbank.cc
+++ b/speechx/speechx/common/frontend/audio/fbank.cc
--- a/speechx/speechx/common/frontend/audio/fbank.h
+++ b/speechx/speechx/common/frontend/audio/fbank.h
--- a/speechx/speechx/common/frontend/audio/feature_cache.cc
+++ b/speechx/speechx/common/frontend/audio/feature_cache.cc
--- a/speechx/speechx/common/frontend/audio/feature_cache.h
+++ b/speechx/speechx/common/frontend/audio/feature_cache.h
--- a/speechx/speechx/common/frontend/audio/feature_common.h
+++ b/speechx/speechx/common/frontend/audio/feature_common.h
--- a/speechx/speechx/common/frontend/audio/feature_common_inl.h
+++ b/speechx/speechx/common/frontend/audio/feature_common_inl.h
--- a/speechx/speechx/common/frontend/audio/feature_pipeline.cc
+++ b/speechx/speechx/common/frontend/audio/feature_pipeline.cc
--- a/speechx/speechx/common/frontend/audio/feature_pipeline.h
+++ b/speechx/speechx/common/frontend/audio/feature_pipeline.h
--- a/speechx/speechx/common/frontend/audio/frontend_itf.h
+++ b/speechx/speechx/common/frontend/audio/frontend_itf.h
--- a/speechx/speechx/common/frontend/audio/linear_spectrogram.cc
+++ b/speechx/speechx/common/frontend/audio/linear_spectrogram.cc
--- a/speechx/speechx/common/frontend/audio/linear_spectrogram.h
+++ b/speechx/speechx/common/frontend/audio/linear_spectrogram.h
--- a/speechx/speechx/common/frontend/audio/mfcc.cc
+++ b/speechx/speechx/common/frontend/audio/mfcc.cc
--- a/speechx/speechx/common/frontend/audio/mfcc.h
+++ b/speechx/speechx/common/frontend/audio/mfcc.h
--- a/speechx/speechx/common/frontend/audio/normalizer.h
+++ b/speechx/speechx/common/frontend/audio/normalizer.h
--- a/speechx/speechx/common/utils/CMakeLists.txt
+++ b/speechx/speechx/common/utils/CMakeLists.txt
--- a/speechx/speechx/common/utils/file_utils.cc
+++ b/speechx/speechx/common/utils/file_utils.cc
--- a/speechx/speechx/common/utils/file_utils.h
+++ b/speechx/speechx/common/utils/file_utils.h
--- a/speechx/speechx/common/utils/math.cc
+++ b/speechx/speechx/common/utils/math.cc
--- a/speechx/speechx/common/utils/math.h
+++ b/speechx/speechx/common/utils/math.h
--- a/speechx/speechx/decoder/ctc_decoders
+++ b/speechx/speechx/decoder/ctc_decoders
@ -1 +0,0 @@
-../../../third_party/ctc_decoders
--- a/speechx/speechx/frontend/text/CMakeLists.txt
+++ b/speechx/speechx/frontend/text/CMakeLists.txt
--- a/speechx/speechx/kaldi/CMakeLists.txt
+++ b/speechx/speechx/kaldi/CMakeLists.txt
@ -1,4 +1,7 @@
 project(kaldi)
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+)

 add_subdirectory(base)
 add_subdirectory(util)
@ -10,4 +13,4 @@ add_subdirectory(decoder)
 add_subdirectory(lm)

 add_subdirectory(fstbin)
-add_subdirectory(lmbin)
+add_subdirectory(lmbin)
--- a/speechx/speechx/third_party/CMakeLists.txt
+++ b/speechx/speechx/third_party/CMakeLists.txt
--- a/speechx/speechx/third_party/README.md
+++ b/speechx/speechx/third_party/README.md
@ -1,4 +0,0 @@
-# third party
-
-Those libs copied and developed from third pary opensource software projects.
-For all of these things, the official websites are the best place to go.