[speechx]Speechx directory refactor (#2746)

* refactor directory
3 years ago · 869f4267d5
parent 7804c8e8f3
commit 869f4267d5
100 changed files with 1821 additions and 53 deletions
--- a/speechx/requirement.txt
+++ b/speechx/requirement.txt
@ -1 +0,0 @@
 paddlepaddle>=2.4rc
--- a/speechx/speechx/CMakeLists.txt
+++ b/speechx/speechx/CMakeLists.txt
@ -2,50 +2,11 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 project(speechx LANGUAGES CXX)
-include_directories(
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-${CMAKE_CURRENT_SOURCE_DIR}
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
-${CMAKE_CURRENT_SOURCE_DIR}/kaldi
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)
 )
 add_subdirectory(kaldi)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/utils
 )
 add_subdirectory(utils)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/frontend
 )
 add_subdirectory(frontend)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/nnet
 )
 add_subdirectory(nnet)
-include_directories(
+add_subdirectory(asr)
-${CMAKE_CURRENT_SOURCE_DIR}
+add_subdirectory(common)
-${CMAKE_CURRENT_SOURCE_DIR}/decoder
+add_subdirectory(kaldi)
 )
 add_subdirectory(decoder)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/recognizer
 )
 add_subdirectory(recognizer)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/protocol
 )
 add_subdirectory(protocol)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/codelab
 )
 add_subdirectory(codelab)
--- a/speechx/speechx/asr/CMakeLists.txt
+++ b/speechx/speechx/asr/CMakeLists.txt
@ -0,0 +1,11 @@
 cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 project(ASR LANGUAGES CXX)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/server)
 add_subdirectory(decoder)
 add_subdirectory(recognizer)
 add_subdirectory(nnet)
 add_subdirectory(server)
--- a/speechx/speechx/asr/decoder/CMakeLists.txt
+++ b/speechx/speechx/asr/decoder/CMakeLists.txt
--- a/speechx/speechx/asr/decoder/common.h
+++ b/speechx/speechx/asr/decoder/common.h
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
--- a/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
--- a/speechx/speechx/asr/decoder/ctc_beam_search_opt.h
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_opt.h
--- a/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
+++ b/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
@ -0,0 +1,9 @@
 ThreadPool/
 build/
 dist/
 kenlm/
 openfst-1.6.3/
 openfst-1.6.3.tar.gz
 swig_decoders.egg-info/
 decoders_wrap.cxx
 swig_decoders.py
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
@ -0,0 +1,607 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "ctc_beam_search_decoder.h"
 #include <algorithm>
 #include <cmath>
 #include <iostream>
 #include <limits>
 #include <map>
 #include <utility>
 #include "ThreadPool.h"
 #include "fst/fstlib.h"
 #include "decoder_utils.h"
 #include "path_trie.h"
 using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
 std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
    const std::vector<std::vector<double>> &probs_seq,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    double cutoff_prob,
    size_t cutoff_top_n,
    Scorer *ext_scorer,
    size_t blank_id) {
    // dimension check
    size_t num_time_steps = probs_seq.size();
    for (size_t i = 0; i < num_time_steps; ++i) {
        VALID_CHECK_EQ(probs_seq[i].size(),
                       // vocabulary.size() + 1,
                       vocabulary.size(),
                       "The shape of probs_seq does not match with "
                       "the shape of the vocabulary");
    }
    // assign space id
    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
    int space_id = it - vocabulary.begin();
    // if no space in vocabulary
    if ((size_t)space_id >= vocabulary.size()) {
        space_id = -2;
    }
    // init prefixes' root
    PathTrie root;
    root.score = root.log_prob_b_prev = 0.0;
    std::vector<PathTrie *> prefixes;
    prefixes.push_back(&root);
    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
        auto fst_dict =
            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
        root.set_dictionary(dict_ptr);
        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
        root.set_matcher(matcher);
    }
    // prefix search over time
    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
        auto &prob = probs_seq[time_step];
        float min_cutoff = -NUM_FLT_INF;
        bool full_beam = false;
        if (ext_scorer != nullptr) {
            size_t num_prefixes = std::min(prefixes.size(), beam_size);
            std::sort(prefixes.begin(),
                      prefixes.begin() + num_prefixes,
                      prefix_compare);
            min_cutoff = prefixes[num_prefixes - 1]->score +
                         std::log(prob[blank_id]) -
                         std::max(0.0, ext_scorer->beta);
            full_beam = (num_prefixes == beam_size);
        }
        std::vector<std::pair<size_t, float>> log_prob_idx =
            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
        // loop over chars
        for (size_t index = 0; index < log_prob_idx.size(); index++) {
            auto c = log_prob_idx[index].first;
            auto log_prob_c = log_prob_idx[index].second;
            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
                auto prefix = prefixes[i];
                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
                    break;
                }
                // blank
                if (c == blank_id) {
                    prefix->log_prob_b_cur = log_sum_exp(
                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
                    continue;
                }
                // repeated character
                if (c == prefix->character) {
                    prefix->log_prob_nb_cur =
                        log_sum_exp(prefix->log_prob_nb_cur,
                                    log_prob_c + prefix->log_prob_nb_prev);
                }
                // get new prefix
                auto prefix_new = prefix->get_path_trie(c);
                if (prefix_new != nullptr) {
                    float log_p = -NUM_FLT_INF;
                    if (c == prefix->character &&
                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
                        log_p = log_prob_c + prefix->log_prob_b_prev;
                    } else if (c != prefix->character) {
                        log_p = log_prob_c + prefix->score;
                    }
                    // language model scoring
                    if (ext_scorer != nullptr &&
                        (c == space_id || ext_scorer->is_character_based())) {
                        PathTrie *prefix_to_score = nullptr;
                        // skip scoring the space
                        if (ext_scorer->is_character_based()) {
                            prefix_to_score = prefix_new;
                        } else {
                            prefix_to_score = prefix;
                        }
                        float score = 0.0;
                        std::vector<std::string> ngram;
                        ngram = ext_scorer->make_ngram(prefix_to_score);
                        score = ext_scorer->get_log_cond_prob(ngram) *
                                ext_scorer->alpha;
                        log_p += score;
                        log_p += ext_scorer->beta;
                    }
                    prefix_new->log_prob_nb_cur =
                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
                }
            }  // end of loop over prefix
        }      // end of loop over vocabulary
        prefixes.clear();
        // update log probs
        root.iterate_to_vec(prefixes);
        // only preserve top beam_size prefixes
        if (prefixes.size() >= beam_size) {
            std::nth_element(prefixes.begin(),
                             prefixes.begin() + beam_size,
                             prefixes.end(),
                             prefix_compare);
            for (size_t i = beam_size; i < prefixes.size(); ++i) {
                prefixes[i]->remove();
            }
        }
    }  // end of loop over time
    // score the last word of each prefix that doesn't end with space
    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
            auto prefix = prefixes[i];
            if (!prefix->is_empty() && prefix->character != space_id) {
                float score = 0.0;
                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
                score =
                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
                score += ext_scorer->beta;
                prefix->score += score;
            }
        }
    }
    size_t num_prefixes = std::min(prefixes.size(), beam_size);
    std::sort(
        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
    // compute approximate ctc score as the return score, without affecting the
    // return order of decoding result. To delete when decoder gets stable.
    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
        double approx_ctc = prefixes[i]->score;
        if (ext_scorer != nullptr) {
            std::vector<int> output;
            prefixes[i]->get_path_vec(output);
            auto prefix_length = output.size();
            auto words = ext_scorer->split_labels(output);
            // remove word insert
            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
            // remove language model weight:
            approx_ctc -=
                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
        }
        prefixes[i]->approx_ctc = approx_ctc;
    }
    return get_beam_search_result(prefixes, vocabulary, beam_size);
 }
 std::vector<std::vector<std::pair<double, std::string>>>
 ctc_beam_search_decoding_batch(
    const std::vector<std::vector<std::vector<double>>> &probs_split,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    size_t num_processes,
    double cutoff_prob,
    size_t cutoff_top_n,
    Scorer *ext_scorer,
    size_t blank_id) {
    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
    // thread pool
    ThreadPool pool(num_processes);
    // number of samples
    size_t batch_size = probs_split.size();
    // enqueue the tasks of decoding
    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
    for (size_t i = 0; i < batch_size; ++i) {
        res.emplace_back(pool.enqueue(ctc_beam_search_decoding,
                                      probs_split[i],
                                      vocabulary,
                                      beam_size,
                                      cutoff_prob,
                                      cutoff_top_n,
                                      ext_scorer,
                                      blank_id));
    }
    // get decoding results
    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
    for (size_t i = 0; i < batch_size; ++i) {
        batch_results.emplace_back(res[i].get());
    }
    return batch_results;
 }
 void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer) {
    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
        auto fst_dict =
            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
        root->set_dictionary(dict_ptr);
        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
        root->set_matcher(matcher);
    }
 }
 void ctc_beam_search_decode_chunk(
    PathTrie *root,
    std::vector<PathTrie *> &prefixes,
    const std::vector<std::vector<double>> &probs_seq,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    double cutoff_prob,
    size_t cutoff_top_n,
    Scorer *ext_scorer,
    size_t blank_id) {
    // dimension check
    size_t num_time_steps = probs_seq.size();
    for (size_t i = 0; i < num_time_steps; ++i) {
        VALID_CHECK_EQ(probs_seq[i].size(),
                       // vocabulary.size() + 1,
                       vocabulary.size(),
                       "The shape of probs_seq does not match with "
                       "the shape of the vocabulary");
    }
    // assign space id
    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
    int space_id = it - vocabulary.begin();
    // if no space in vocabulary
    if ((size_t)space_id >= vocabulary.size()) {
        space_id = -2;
    }
    // init prefixes' root
    //
    // prefix search over time
    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
        auto &prob = probs_seq[time_step];
        float min_cutoff = -NUM_FLT_INF;
        bool full_beam = false;
        if (ext_scorer != nullptr) {
            size_t num_prefixes = std::min(prefixes.size(), beam_size);
            std::sort(prefixes.begin(),
                      prefixes.begin() + num_prefixes,
                      prefix_compare);
            min_cutoff = prefixes[num_prefixes - 1]->score +
                         std::log(prob[blank_id]) -
                         std::max(0.0, ext_scorer->beta);
            full_beam = (num_prefixes == beam_size);
        }
        std::vector<std::pair<size_t, float>> log_prob_idx =
            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
        // loop over chars
        for (size_t index = 0; index < log_prob_idx.size(); index++) {
            auto c = log_prob_idx[index].first;
            auto log_prob_c = log_prob_idx[index].second;
            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
                auto prefix = prefixes[i];
                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
                    break;
                }
                // blank
                if (c == blank_id) {
                    prefix->log_prob_b_cur = log_sum_exp(
                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
                    continue;
                }
                // repeated character
                if (c == prefix->character) {
                    prefix->log_prob_nb_cur =
                        log_sum_exp(prefix->log_prob_nb_cur,
                                    log_prob_c + prefix->log_prob_nb_prev);
                }
                // get new prefix
                auto prefix_new = prefix->get_path_trie(c);
                if (prefix_new != nullptr) {
                    float log_p = -NUM_FLT_INF;
                    if (c == prefix->character &&
                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
                        log_p = log_prob_c + prefix->log_prob_b_prev;
                    } else if (c != prefix->character) {
                        log_p = log_prob_c + prefix->score;
                    }
                    // language model scoring
                    if (ext_scorer != nullptr &&
                        (c == space_id || ext_scorer->is_character_based())) {
                        PathTrie *prefix_to_score = nullptr;
                        // skip scoring the space
                        if (ext_scorer->is_character_based()) {
                            prefix_to_score = prefix_new;
                        } else {
                            prefix_to_score = prefix;
                        }
                        float score = 0.0;
                        std::vector<std::string> ngram;
                        ngram = ext_scorer->make_ngram(prefix_to_score);
                        score = ext_scorer->get_log_cond_prob(ngram) *
                                ext_scorer->alpha;
                        log_p += score;
                        log_p += ext_scorer->beta;
                    }
                    prefix_new->log_prob_nb_cur =
                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
                }
            }  // end of loop over prefix
        }      // end of loop over vocabulary
        prefixes.clear();
        // update log probs
        root->iterate_to_vec(prefixes);
        // only preserve top beam_size prefixes
        if (prefixes.size() >= beam_size) {
            std::nth_element(prefixes.begin(),
                             prefixes.begin() + beam_size,
                             prefixes.end(),
                             prefix_compare);
            for (size_t i = beam_size; i < prefixes.size(); ++i) {
                prefixes[i]->remove();
            }
        }
    }  // end of loop over time
    return;
 }
 std::vector<std::pair<double, std::string>> get_decode_result(
    std::vector<PathTrie *> &prefixes,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    Scorer *ext_scorer) {
    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
    int space_id = it - vocabulary.begin();
    // if no space in vocabulary
    if ((size_t)space_id >= vocabulary.size()) {
        space_id = -2;
    }
    // score the last word of each prefix that doesn't end with space
    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
            auto prefix = prefixes[i];
            if (!prefix->is_empty() && prefix->character != space_id) {
                float score = 0.0;
                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
                score =
                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
                score += ext_scorer->beta;
                prefix->score += score;
            }
        }
    }
    size_t num_prefixes = std::min(prefixes.size(), beam_size);
    std::sort(
        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
    // compute aproximate ctc score as the return score, without affecting the
    // return order of decoding result. To delete when decoder gets stable.
    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
        double approx_ctc = prefixes[i]->score;
        if (ext_scorer != nullptr) {
            std::vector<int> output;
            prefixes[i]->get_path_vec(output);
            auto prefix_length = output.size();
            auto words = ext_scorer->split_labels(output);
            // remove word insert
            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
            // remove language model weight:
            approx_ctc -=
                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
        }
        prefixes[i]->approx_ctc = approx_ctc;
    }
    std::vector<std::pair<double, std::string>> res =
        get_beam_search_result(prefixes, vocabulary, beam_size);
    // pay back the last word of each prefix that doesn't end with space (for
    // decoding by chunk)
    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
            auto prefix = prefixes[i];
            if (!prefix->is_empty() && prefix->character != space_id) {
                float score = 0.0;
                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
                score =
                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
                score += ext_scorer->beta;
                prefix->score -= score;
            }
        }
    }
    return res;
 }
 void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage) {
    storage = nullptr;
 }
 CtcBeamSearchDecoderBatch::~CtcBeamSearchDecoderBatch() {}
 CtcBeamSearchDecoderBatch::CtcBeamSearchDecoderBatch(
    const std::vector<std::string> &vocabulary,
    size_t batch_size,
    size_t beam_size,
    size_t num_processes,
    double cutoff_prob,
    size_t cutoff_top_n,
    Scorer *ext_scorer,
    size_t blank_id)
    : batch_size(batch_size),
      beam_size(beam_size),
      num_processes(num_processes),
      cutoff_prob(cutoff_prob),
      cutoff_top_n(cutoff_top_n),
      ext_scorer(ext_scorer),
      blank_id(blank_id) {
    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
    VALID_CHECK_GT(
        this->num_processes, 0, "num_processes must be nonnegative!");
    this->vocabulary = vocabulary;
    for (size_t i = 0; i < batch_size; i++) {
        this->decoder_storage_vector.push_back(
            std::unique_ptr<CtcBeamSearchDecoderStorage>(
                new CtcBeamSearchDecoderStorage()));
        ctc_beam_search_decode_chunk_begin(
            this->decoder_storage_vector[i]->root, ext_scorer);
    }
 };
 /**
 * Input
 * probs_split: shape [B, T, D]
 */
 void CtcBeamSearchDecoderBatch::next(
    const std::vector<std::vector<std::vector<double>>> &probs_split,
    const std::vector<std::string> &has_value) {
    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
    // thread pool
    size_t num_has_value = 0;
    for (int i = 0; i < has_value.size(); i++)
        if (has_value[i] == "true") num_has_value += 1;
    ThreadPool pool(std::min(num_processes, num_has_value));
    // number of samples
    size_t probs_num = probs_split.size();
    VALID_CHECK_EQ(this->batch_size,
                   probs_num,
                   "The batch size of the current input data should be same "
                   "with the input data before");
    // enqueue the tasks of decoding
    std::vector<std::future<void>> res;
    for (size_t i = 0; i < batch_size; ++i) {
        if (has_value[i] == "true") {
            res.emplace_back(pool.enqueue(
                ctc_beam_search_decode_chunk,
                std::ref(this->decoder_storage_vector[i]->root),
                std::ref(this->decoder_storage_vector[i]->prefixes),
                probs_split[i],
                this->vocabulary,
                this->beam_size,
                this->cutoff_prob,
                this->cutoff_top_n,
                this->ext_scorer,
                this->blank_id));
        }
    }
    for (size_t i = 0; i < batch_size; ++i) {
        res[i].get();
    }
    return;
 };
 /**
 * Return
 * batch_result: shape[B, beam_size,(-approx_ctc score, string)]
 */
 std::vector<std::vector<std::pair<double, std::string>>>
 CtcBeamSearchDecoderBatch::decode() {
    VALID_CHECK_GT(
        this->num_processes, 0, "num_processes must be nonnegative!");
    // thread pool
    ThreadPool pool(this->num_processes);
    // number of samples
    // enqueue the tasks of decoding
    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
    for (size_t i = 0; i < this->batch_size; ++i) {
        res.emplace_back(
            pool.enqueue(get_decode_result,
                         std::ref(this->decoder_storage_vector[i]->prefixes),
                         this->vocabulary,
                         this->beam_size,
                         this->ext_scorer));
    }
    // get decoding results
    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
    for (size_t i = 0; i < this->batch_size; ++i) {
        batch_results.emplace_back(res[i].get());
    }
    return batch_results;
 }
 /**
 * reset the state of ctcBeamSearchDecoderBatch
 */
 void CtcBeamSearchDecoderBatch::reset_state(size_t batch_size,
                                            size_t beam_size,
                                            size_t num_processes,
                                            double cutoff_prob,
                                            size_t cutoff_top_n) {
    this->batch_size = batch_size;
    this->beam_size = beam_size;
    this->num_processes = num_processes;
    this->cutoff_prob = cutoff_prob;
    this->cutoff_top_n = cutoff_top_n;
    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
    VALID_CHECK_GT(
        this->num_processes, 0, "num_processes must be nonnegative!");
    // thread pool
    ThreadPool pool(this->num_processes);
    // number of samples
    // enqueue the tasks of decoding
    std::vector<std::future<void>> res;
    size_t storage_size = decoder_storage_vector.size();
    for (size_t i = 0; i < storage_size; i++) {
        res.emplace_back(pool.enqueue(
            free_storage, std::ref(this->decoder_storage_vector[i])));
    }
    for (size_t i = 0; i < storage_size; ++i) {
        res[i].get();
    }
    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>().swap(
        decoder_storage_vector);
    for (size_t i = 0; i < this->batch_size; i++) {
        this->decoder_storage_vector.push_back(
            std::unique_ptr<CtcBeamSearchDecoderStorage>(
                new CtcBeamSearchDecoderStorage()));
        ctc_beam_search_decode_chunk_begin(
            this->decoder_storage_vector[i]->root, this->ext_scorer);
    }
 }
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
@ -0,0 +1,175 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef CTC_BEAM_SEARCH_DECODER_H_
 #define CTC_BEAM_SEARCH_DECODER_H_
 #include <string>
 #include <utility>
 #include <vector>
 #include "scorer.h"
 /* CTC Beam Search Decoder
 * Parameters:
 *     probs_seq: 2-D vector that each element is a vector of probabilities
 *               over vocabulary of one time step.
 *     vocabulary: A vector of vocabulary.
 *     beam_size: The width of beam search.
 *     cutoff_prob: Cutoff probability for pruning.
 *     cutoff_top_n: Cutoff number for pruning.
 *     ext_scorer: External scorer to evaluate a prefix, which consists of
 *                 n-gram language model scoring and word insertion term.
 *                 Default null, decoding the input sample without scorer.
 * Return:
 *     A vector that each element is a pair of score  and decoding result,
 *     in desending order.
 */
 std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
    const std::vector<std::vector<double>> &probs_seq,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    double cutoff_prob = 1.0,
    size_t cutoff_top_n = 40,
    Scorer *ext_scorer = nullptr,
    size_t blank_id = 0);
 /* CTC Beam Search Decoder for batch data
 * Parameters:
 *     probs_seq: 3-D vector that each element is a 2-D vector that can be used
 *                by ctc_beam_search_decoder().
 *     vocabulary: A vector of vocabulary.
 *     beam_size: The width of beam search.
 *     num_processes: Number of threads for beam search.
 *     cutoff_prob: Cutoff probability for pruning.
 *     cutoff_top_n: Cutoff number for pruning.
 *     ext_scorer: External scorer to evaluate a prefix, which consists of
 *                 n-gram language model scoring and word insertion term.
 *                 Default null, decoding the input sample without scorer.
 * Return:
 *     A 2-D vector that each element is a vector of beam search decoding
 *     result for one audio sample.
 */
 std::vector<std::vector<std::pair<double, std::string>>>
 ctc_beam_search_decoding_batch(
    const std::vector<std::vector<std::vector<double>>> &probs_split,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    size_t num_processes,
    double cutoff_prob = 1.0,
    size_t cutoff_top_n = 40,
    Scorer *ext_scorer = nullptr,
    size_t blank_id = 0);
 /**
 * Store the root and prefixes for decoder
 */
 class CtcBeamSearchDecoderStorage {
  public:
    PathTrie *root = nullptr;
    std::vector<PathTrie *> prefixes;
    CtcBeamSearchDecoderStorage() {
        // init prefixes' root
        this->root = new PathTrie();
        this->root->log_prob_b_prev = 0.0;
        // The score of root is in log scale.Since the prob=1.0, the prob score
        // in log scale is 0.0
        this->root->score = root->log_prob_b_prev;
        // std::vector<PathTrie *> prefixes;
        this->prefixes.push_back(root);
    };
    ~CtcBeamSearchDecoderStorage() {
        if (root != nullptr) {
            delete root;
            root = nullptr;
        }
    };
 };
 /**
 * The ctc beam search decoder, support batchsize >= 1
 */
 class CtcBeamSearchDecoderBatch {
  public:
    CtcBeamSearchDecoderBatch(const std::vector<std::string> &vocabulary,
                              size_t batch_size,
                              size_t beam_size,
                              size_t num_processes,
                              double cutoff_prob,
                              size_t cutoff_top_n,
                              Scorer *ext_scorer,
                              size_t blank_id);
    ~CtcBeamSearchDecoderBatch();
    void next(const std::vector<std::vector<std::vector<double>>> &probs_split,
              const std::vector<std::string> &has_value);
    std::vector<std::vector<std::pair<double, std::string>>> decode();
    void reset_state(size_t batch_size,
                     size_t beam_size,
                     size_t num_processes,
                     double cutoff_prob,
                     size_t cutoff_top_n);
  private:
    std::vector<std::string> vocabulary;
    size_t batch_size;
    size_t beam_size;
    size_t num_processes;
    double cutoff_prob;
    size_t cutoff_top_n;
    Scorer *ext_scorer;
    size_t blank_id;
    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>
        decoder_storage_vector;
 };
 /**
 * function for chunk decoding
 */
 void ctc_beam_search_decode_chunk(
    PathTrie *root,
    std::vector<PathTrie *> &prefixes,
    const std::vector<std::vector<double>> &probs_seq,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    double cutoff_prob,
    size_t cutoff_top_n,
    Scorer *ext_scorer,
    size_t blank_id);
 std::vector<std::pair<double, std::string>> get_decode_result(
    std::vector<PathTrie *> &prefixes,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    Scorer *ext_scorer);
 /**
 * free the CtcBeamSearchDecoderStorage
 */
 void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage);
 /**
 * initialize the root
 */
 void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer);
 #endif  // CTC_BEAM_SEARCH_DECODER_H_
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
@ -0,0 +1,61 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "ctc_greedy_decoder.h"
 #include "decoder_utils.h"
 std::string ctc_greedy_decoding(
    const std::vector<std::vector<double>> &probs_seq,
    const std::vector<std::string> &vocabulary,
    size_t blank_id) {
    // dimension check
    size_t num_time_steps = probs_seq.size();
    for (size_t i = 0; i < num_time_steps; ++i) {
        VALID_CHECK_EQ(probs_seq[i].size(),
                       vocabulary.size(),
                       "The shape of probs_seq does not match with "
                       "the shape of the vocabulary");
    }
    // size_t blank_id = vocabulary.size();
    std::vector<size_t> max_idx_vec(num_time_steps, 0);
    std::vector<size_t> idx_vec;
    for (size_t i = 0; i < num_time_steps; ++i) {
        double max_prob = 0.0;
        size_t max_idx = 0;
        const std::vector<double> &probs_step = probs_seq[i];
        for (size_t j = 0; j < probs_step.size(); ++j) {
            if (max_prob < probs_step[j]) {
                max_idx = j;
                max_prob = probs_step[j];
            }
        }
        // id with maximum probability in current time step
        max_idx_vec[i] = max_idx;
        // deduplicate
        if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) {
            idx_vec.push_back(max_idx_vec[i]);
        }
    }
    std::string best_path_result;
    for (size_t i = 0; i < idx_vec.size(); ++i) {
        if (idx_vec[i] != blank_id) {
            std::string ch = vocabulary[idx_vec[i]];
            best_path_result += (ch == kSPACE) ? tSPACE : ch;
        }
    }
    return best_path_result;
 }
--- a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
@ -0,0 +1,35 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef CTC_GREEDY_DECODER_H
 #define CTC_GREEDY_DECODER_H
 #include <string>
 #include <vector>
 /* CTC Greedy (Best Path) Decoder
 *
 * Parameters:
 *     probs_seq: 2-D vector that each element is a vector of probabilities
 *               over vocabulary of one time step.
 *     vocabulary: A vector of vocabulary.
 * Return:
 *     The decoding result in string
 */
 std::string ctc_greedy_decoding(
    const std::vector<std::vector<double>>& probs_seq,
    const std::vector<std::string>& vocabulary,
    size_t blank_id);
 #endif  // CTC_GREEDY_DECODER_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
@ -0,0 +1,193 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "decoder_utils.h"
 #include <algorithm>
 #include <cmath>
 #include <limits>
 std::vector<std::pair<size_t, float>> get_pruned_log_probs(
    const std::vector<double> &prob_step,
    double cutoff_prob,
    size_t cutoff_top_n) {
    std::vector<std::pair<int, double>> prob_idx;
    for (size_t i = 0; i < prob_step.size(); ++i) {
        prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
    }
    // pruning of vocabulary
    size_t cutoff_len = prob_step.size();
    if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
        std::sort(prob_idx.begin(),
                  prob_idx.end(),
                  pair_comp_second_rev<int, double>);
        if (cutoff_prob < 1.0) {
            double cum_prob = 0.0;
            cutoff_len = 0;
            for (size_t i = 0; i < prob_idx.size(); ++i) {
                cum_prob += prob_idx[i].second;
                cutoff_len += 1;
                if (cum_prob >= cutoff_prob || cutoff_len >= cutoff_top_n)
                    break;
            }
        }
        prob_idx = std::vector<std::pair<int, double>>(
            prob_idx.begin(), prob_idx.begin() + cutoff_len);
    }
    std::vector<std::pair<size_t, float>> log_prob_idx;
    for (size_t i = 0; i < cutoff_len; ++i) {
        log_prob_idx.push_back(std::pair<int, float>(
            prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN)));
    }
    return log_prob_idx;
 }
 std::vector<std::pair<double, std::string>> get_beam_search_result(
    const std::vector<PathTrie *> &prefixes,
    const std::vector<std::string> &vocabulary,
    size_t beam_size) {
    // allow for the post processing
    std::vector<PathTrie *> space_prefixes;
    if (space_prefixes.empty()) {
        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
            space_prefixes.push_back(prefixes[i]);
        }
    }
    std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare);
    std::vector<std::pair<double, std::string>> output_vecs;
    for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) {
        std::vector<int> output;
        space_prefixes[i]->get_path_vec(output);
        // convert index to string
        std::string output_str;
        for (size_t j = 0; j < output.size(); j++) {
            std::string ch = vocabulary[output[j]];
            output_str += (ch == kSPACE) ? tSPACE : ch;
        }
        std::pair<double, std::string> output_pair(
            -space_prefixes[i]->approx_ctc, output_str);
        output_vecs.emplace_back(output_pair);
    }
    return output_vecs;
 }
 size_t get_utf8_str_len(const std::string &str) {
    size_t str_len = 0;
    for (char c : str) {
        str_len += ((c & 0xc0) != 0x80);
    }
    return str_len;
 }
 std::vector<std::string> split_utf8_str(const std::string &str) {
    std::vector<std::string> result;
    std::string out_str;
    for (char c : str) {
        if ((c & 0xc0) != 0x80)  // new UTF-8 character
        {
            if (!out_str.empty()) {
                result.push_back(out_str);
                out_str.clear();
            }
        }
        out_str.append(1, c);
    }
    result.push_back(out_str);
    return result;
 }
 std::vector<std::string> split_str(const std::string &s,
                                   const std::string &delim) {
    std::vector<std::string> result;
    std::size_t start = 0, delim_len = delim.size();
    while (true) {
        std::size_t end = s.find(delim, start);
        if (end == std::string::npos) {
            if (start < s.size()) {
                result.push_back(s.substr(start));
            }
            break;
        }
        if (end > start) {
            result.push_back(s.substr(start, end - start));
        }
        start = end + delim_len;
    }
    return result;
 }
 bool prefix_compare(const PathTrie *x, const PathTrie *y) {
    if (x->score == y->score) {
        if (x->character == y->character) {
            return false;
        } else {
            return (x->character < y->character);
        }
    } else {
        return x->score > y->score;
    }
 }
 void add_word_to_fst(const std::vector<int> &word,
                     fst::StdVectorFst *dictionary) {
    if (dictionary->NumStates() == 0) {
        fst::StdVectorFst::StateId start = dictionary->AddState();
        assert(start == 0);
        dictionary->SetStart(start);
    }
    fst::StdVectorFst::StateId src = dictionary->Start();
    fst::StdVectorFst::StateId dst;
    for (auto c : word) {
        dst = dictionary->AddState();
        dictionary->AddArc(src, fst::StdArc(c, c, 0, dst));
        src = dst;
    }
    dictionary->SetFinal(dst, fst::StdArc::Weight::One());
 }
 bool add_word_to_dictionary(
    const std::string &word,
    const std::unordered_map<std::string, int> &char_map,
    bool add_space,
    int SPACE_ID,
    fst::StdVectorFst *dictionary) {
    auto characters = split_utf8_str(word);
    std::vector<int> int_word;
    for (auto &c : characters) {
        if (c == " ") {
            int_word.push_back(SPACE_ID);
        } else {
            auto int_c = char_map.find(c);
            if (int_c != char_map.end()) {
                int_word.push_back(int_c->second);
            } else {
                return false;  // return without adding
            }
        }
    }
    if (add_space) {
        int_word.push_back(SPACE_ID);
    }
    add_word_to_fst(int_word, dictionary);
    return true;  // return with successful adding
 }
--- a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
@ -0,0 +1,111 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef DECODER_UTILS_H_
 #define DECODER_UTILS_H_
 #include <string>
 #include <utility>
 #include "fst/log.h"
 #include "path_trie.h"
 const std::string kSPACE = "<space>";
 const std::string tSPACE = " ";
 const float NUM_FLT_INF = std::numeric_limits<float>::max();
 const float NUM_FLT_MIN = std::numeric_limits<float>::min();
 // inline function for validation check
 inline void check(
    bool x, const char *expr, const char *file, int line, const char *err) {
    if (!x) {
        std::cout << "[" << file << ":" << line << "] ";
        LOG(FATAL) << "\"" << expr << "\" check failed. " << err;
    }
 }
 #define VALID_CHECK(x, info) \
    check(static_cast<bool>(x), #x, __FILE__, __LINE__, info)
 #define VALID_CHECK_EQ(x, y, info) VALID_CHECK((x) == (y), info)
 #define VALID_CHECK_GT(x, y, info) VALID_CHECK((x) > (y), info)
 #define VALID_CHECK_LT(x, y, info) VALID_CHECK((x) < (y), info)
 // Function template for comparing two pairs
 template <typename T1, typename T2>
 bool pair_comp_first_rev(const std::pair<T1, T2> &a,
                         const std::pair<T1, T2> &b) {
    return a.first > b.first;
 }
 // Function template for comparing two pairs
 template <typename T1, typename T2>
 bool pair_comp_second_rev(const std::pair<T1, T2> &a,
                          const std::pair<T1, T2> &b) {
    return a.second > b.second;
 }
 // Return the sum of two probabilities in log scale
 template <typename T>
 T log_sum_exp(const T &x, const T &y) {
    static T num_min = -std::numeric_limits<T>::max();
    if (x <= num_min) return y;
    if (y <= num_min) return x;
    T xmax = std::max(x, y);
    return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
 }
 // Get pruned probability vector for each time step's beam search
 std::vector<std::pair<size_t, float>> get_pruned_log_probs(
    const std::vector<double> &prob_step,
    double cutoff_prob,
    size_t cutoff_top_n);
 // Get beam search result from prefixes in trie tree
 std::vector<std::pair<double, std::string>> get_beam_search_result(
    const std::vector<PathTrie *> &prefixes,
    const std::vector<std::string> &vocabulary,
    size_t beam_size);
 // Functor for prefix comparsion
 bool prefix_compare(const PathTrie *x, const PathTrie *y);
 /* Get length of utf8 encoding string
 * See: http://stackoverflow.com/a/4063229
 */
 size_t get_utf8_str_len(const std::string &str);
 /* Split a string into a list of strings on a given string
 * delimiter. NB: delimiters on beginning / end of string are
 * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"].
 */
 std::vector<std::string> split_str(const std::string &s,
                                   const std::string &delim);
 /* Splits string into vector of strings representing
 * UTF-8 characters (not same as chars)
 */
 std::vector<std::string> split_utf8_str(const std::string &str);
 // Add a word in index to the dicionary of fst
 void add_word_to_fst(const std::vector<int> &word,
                     fst::StdVectorFst *dictionary);
 // Add a word in string to dictionary
 bool add_word_to_dictionary(
    const std::string &word,
    const std::unordered_map<std::string, int> &char_map,
    bool add_space,
    int SPACE_ID,
    fst::StdVectorFst *dictionary);
 #endif  // DECODER_UTILS_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
@ -0,0 +1,164 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "path_trie.h"
 #include <algorithm>
 #include <limits>
 #include <memory>
 #include <utility>
 #include <vector>
 #include "decoder_utils.h"
 PathTrie::PathTrie() {
    log_prob_b_prev = -NUM_FLT_INF;
    log_prob_nb_prev = -NUM_FLT_INF;
    log_prob_b_cur = -NUM_FLT_INF;
    log_prob_nb_cur = -NUM_FLT_INF;
    score = -NUM_FLT_INF;
    ROOT_ = -1;
    character = ROOT_;
    exists_ = true;
    parent = nullptr;
    dictionary_ = nullptr;
    dictionary_state_ = 0;
    has_dictionary_ = false;
    matcher_ = nullptr;
 }
 PathTrie::~PathTrie() {
    for (auto child : children_) {
        delete child.second;
        child.second = nullptr;
    }
 }
 PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
    auto child = children_.begin();
    for (child = children_.begin(); child != children_.end(); ++child) {
        if (child->first == new_char) {
            break;
        }
    }
    if (child != children_.end()) {
        if (!child->second->exists_) {
            child->second->exists_ = true;
            child->second->log_prob_b_prev = -NUM_FLT_INF;
            child->second->log_prob_nb_prev = -NUM_FLT_INF;
            child->second->log_prob_b_cur = -NUM_FLT_INF;
            child->second->log_prob_nb_cur = -NUM_FLT_INF;
        }
        return (child->second);
    } else {
        if (has_dictionary_) {
            matcher_->SetState(dictionary_state_);
            bool found = matcher_->Find(new_char + 1);
            if (!found) {
                // Adding this character causes word outside dictionary
                auto FSTZERO = fst::TropicalWeight::Zero();
                auto final_weight = dictionary_->Final(dictionary_state_);
                bool is_final = (final_weight != FSTZERO);
                if (is_final && reset) {
                    dictionary_state_ = dictionary_->Start();
                }
                return nullptr;
            } else {
                PathTrie* new_path = new PathTrie;
                new_path->character = new_char;
                new_path->parent = this;
                new_path->dictionary_ = dictionary_;
                new_path->dictionary_state_ = matcher_->Value().nextstate;
                new_path->has_dictionary_ = true;
                new_path->matcher_ = matcher_;
                children_.push_back(std::make_pair(new_char, new_path));
                return new_path;
            }
        } else {
            PathTrie* new_path = new PathTrie;
            new_path->character = new_char;
            new_path->parent = this;
            children_.push_back(std::make_pair(new_char, new_path));
            return new_path;
        }
    }
 }
 PathTrie* PathTrie::get_path_vec(std::vector<int>& output) {
    return get_path_vec(output, ROOT_);
 }
 PathTrie* PathTrie::get_path_vec(std::vector<int>& output,
                                 int stop,
                                 size_t max_steps) {
    if (character == stop || character == ROOT_ || output.size() == max_steps) {
        std::reverse(output.begin(), output.end());
        return this;
    } else {
        output.push_back(character);
        return parent->get_path_vec(output, stop, max_steps);
    }
 }
 void PathTrie::iterate_to_vec(std::vector<PathTrie*>& output) {
    if (exists_) {
        log_prob_b_prev = log_prob_b_cur;
        log_prob_nb_prev = log_prob_nb_cur;
        log_prob_b_cur = -NUM_FLT_INF;
        log_prob_nb_cur = -NUM_FLT_INF;
        score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev);
        output.push_back(this);
    }
    for (auto child : children_) {
        child.second->iterate_to_vec(output);
    }
 }
 void PathTrie::remove() {
    exists_ = false;
    if (children_.size() == 0) {
        if (parent != nullptr) {
            auto child = parent->children_.begin();
            for (child = parent->children_.begin();
                 child != parent->children_.end();
                 ++child) {
                if (child->first == character) {
                    parent->children_.erase(child);
                    break;
                }
            }
            if (parent->children_.size() == 0 && !parent->exists_) {
                parent->remove();
            }
        }
        delete this;
    }
 }
 void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) {
    dictionary_ = dictionary;
    dictionary_state_ = dictionary->Start();
    has_dictionary_ = true;
 }
 using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
 void PathTrie::set_matcher(std::shared_ptr<FSTMATCH> matcher) {
    matcher_ = matcher;
 }
--- a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
@ -0,0 +1,82 @@
 // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef PATH_TRIE_H
 #define PATH_TRIE_H
 #include <algorithm>
 #include <limits>
 #include <memory>
 #include <utility>
 #include <vector>
 #include "fst/fstlib.h"
 /* Trie tree for prefix storing and manipulating, with a dictionary in
 * finite-state transducer for spelling correction.
 */
 class PathTrie {
  public:
    PathTrie();
    ~PathTrie();
    // get new prefix after appending new char
    PathTrie* get_path_trie(int new_char, bool reset = true);
    // get the prefix in index from root to current node
    PathTrie* get_path_vec(std::vector<int>& output);
    // get the prefix in index from some stop node to current nodel
    PathTrie* get_path_vec(
        std::vector<int>& output,
        int stop,
        size_t max_steps = std::numeric_limits<size_t>::max());
    // update log probs
    void iterate_to_vec(std::vector<PathTrie*>& output);
    // set dictionary for FST
    void set_dictionary(fst::StdVectorFst* dictionary);
    void set_matcher(std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>>);
    bool is_empty() { return ROOT_ == character; }
    // remove current path from root
    void remove();
    float log_prob_b_prev;
    float log_prob_nb_prev;
    float log_prob_b_cur;
    float log_prob_nb_cur;
    float score;
    float approx_ctc;
    int character;
    PathTrie* parent;
  private:
    int ROOT_;
    bool exists_;
    bool has_dictionary_;
    std::vector<std::pair<int, PathTrie*>> children_;
    // pointer to dictionary of FST
    fst::StdVectorFst* dictionary_;
    fst::StdVectorFst::StateId dictionary_state_;
    // true if finding ars in FST
    std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>> matcher_;
 };
 #endif  // PATH_TRIE_H
--- a/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
@ -0,0 +1,232 @@
 // Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
 // "COPYING.LESSER.3");
 #include "scorer.h"
 #include <unistd.h>
 #include <iostream>
 #include "lm/config.hh"
 #include "lm/model.hh"
 #include "lm/state.hh"
 #include "decoder_utils.h"
 using namespace lm::ngram;
 // if your platform is windows ,you need add the define
 #define    F_OK    0
 Scorer::Scorer(double alpha,
               double beta,
               const std::string& lm_path,
               const std::vector<std::string>& vocab_list) {
    this->alpha = alpha;
    this->beta = beta;
    dictionary = nullptr;
    is_character_based_ = true;
    language_model_ = nullptr;
    max_order_ = 0;
    dict_size_ = 0;
    SPACE_ID_ = -1;
    setup(lm_path, vocab_list);
 }
 Scorer::~Scorer() {
    if (language_model_ != nullptr) {
        delete static_cast<lm::base::Model*>(language_model_);
    }
    if (dictionary != nullptr) {
        delete static_cast<fst::StdVectorFst*>(dictionary);
    }
 }
 void Scorer::setup(const std::string& lm_path,
                   const std::vector<std::string>& vocab_list) {
    // load language model
    load_lm(lm_path);
    // set char map for scorer
    set_char_map(vocab_list);
    // fill the dictionary for FST
    if (!is_character_based()) {
        fill_dictionary(true);
    }
 }
 void Scorer::load_lm(const std::string& lm_path) {
    const char* filename = lm_path.c_str();
    VALID_CHECK_EQ(access(filename, F_OK), 0, "Invalid language model path");
    RetriveStrEnumerateVocab enumerate;
    lm::ngram::Config config;
    config.enumerate_vocab = &enumerate;
    language_model_ = lm::ngram::LoadVirtual(filename, config);
    max_order_ = static_cast<lm::base::Model*>(language_model_)->Order();
    vocabulary_ = enumerate.vocabulary;
    for (size_t i = 0; i < vocabulary_.size(); ++i) {
        if (is_character_based_ && vocabulary_[i] != UNK_TOKEN &&
            vocabulary_[i] != START_TOKEN && vocabulary_[i] != END_TOKEN &&
            get_utf8_str_len(enumerate.vocabulary[i]) > 1) {
            is_character_based_ = false;
        }
    }
 }
 double Scorer::get_log_cond_prob(const std::vector<std::string>& words) {
    lm::base::Model* model = static_cast<lm::base::Model*>(language_model_);
    double cond_prob;
    lm::ngram::State state, tmp_state, out_state;
    // avoid to inserting <s> in begin
    model->NullContextWrite(&state);
    for (size_t i = 0; i < words.size(); ++i) {
        lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]);
        // encounter OOV
        if (word_index == 0) {
            return OOV_SCORE;
        }
        cond_prob = model->BaseScore(&state, word_index, &out_state);
        tmp_state = state;
        state = out_state;
        out_state = tmp_state;
    }
    // return  log10 prob
    return cond_prob;
 }
 double Scorer::get_sent_log_prob(const std::vector<std::string>& words) {
    std::vector<std::string> sentence;
    if (words.size() == 0) {
        for (size_t i = 0; i < max_order_; ++i) {
            sentence.push_back(START_TOKEN);
        }
    } else {
        for (size_t i = 0; i < max_order_ - 1; ++i) {
            sentence.push_back(START_TOKEN);
        }
        sentence.insert(sentence.end(), words.begin(), words.end());
    }
    sentence.push_back(END_TOKEN);
    return get_log_prob(sentence);
 }
 double Scorer::get_log_prob(const std::vector<std::string>& words) {
    assert(words.size() > max_order_);
    double score = 0.0;
    for (size_t i = 0; i < words.size() - max_order_ + 1; ++i) {
        std::vector<std::string> ngram(words.begin() + i,
                                       words.begin() + i + max_order_);
        score += get_log_cond_prob(ngram);
    }
    return score;
 }
 void Scorer::reset_params(float alpha, float beta) {
    this->alpha = alpha;
    this->beta = beta;
 }
 std::string Scorer::vec2str(const std::vector<int>& input) {
    std::string word;
    for (auto ind : input) {
        word += char_list_[ind];
    }
    return word;
 }
 std::vector<std::string> Scorer::split_labels(const std::vector<int>& labels) {
    if (labels.empty()) return {};
    std::string s = vec2str(labels);
    std::vector<std::string> words;
    if (is_character_based_) {
        words = split_utf8_str(s);
    } else {
        words = split_str(s, " ");
    }
    return words;
 }
 void Scorer::set_char_map(const std::vector<std::string>& char_list) {
    char_list_ = char_list;
    char_map_.clear();
    // Set the char map for the FST for spelling correction
    for (size_t i = 0; i < char_list_.size(); i++) {
        if (char_list_[i] == kSPACE) {
            SPACE_ID_ = i;
        }
        // The initial state of FST is state 0, hence the index of chars in
        // the FST should start from 1 to avoid the conflict with the initial
        // state, otherwise wrong decoding results would be given.
        char_map_[char_list_[i]] = i + 1;
    }
 }
 std::vector<std::string> Scorer::make_ngram(PathTrie* prefix) {
    std::vector<std::string> ngram;
    PathTrie* current_node = prefix;
    PathTrie* new_node = nullptr;
    for (int order = 0; order < max_order_; order++) {
        std::vector<int> prefix_vec;
        if (is_character_based_) {
            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_, 1);
            current_node = new_node;
        } else {
            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_);
            current_node = new_node->parent;  // Skipping spaces
        }
        // reconstruct word
        std::string word = vec2str(prefix_vec);
        ngram.push_back(word);
        if (new_node->character == -1) {
            // No more spaces, but still need order
            for (int i = 0; i < max_order_ - order - 1; i++) {
                ngram.push_back(START_TOKEN);
            }
            break;
        }
    }
    std::reverse(ngram.begin(), ngram.end());
    return ngram;
 }
 void Scorer::fill_dictionary(bool add_space) {
    fst::StdVectorFst dictionary;
    // For each unigram convert to ints and put in trie
    int dict_size = 0;
    for (const auto& word : vocabulary_) {
        bool added = add_word_to_dictionary(
            word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
        dict_size += added ? 1 : 0;
    }
    dict_size_ = dict_size;
    /* Simplify FST
     * This gets rid of "epsilon" transitions in the FST.
     * These are transitions that don't require a string input to be taken.
     * Getting rid of them is necessary to make the FST deterministic, but
     * can greatly increase the size of the FST
     */
    fst::RmEpsilon(&dictionary);
    fst::StdVectorFst* new_dict = new fst::StdVectorFst;
    /* This makes the FST deterministic, meaning for any string input there's
     * only one possible state the FST could be in.  It is assumed our
     * dictionary is deterministic when using it.
     * (lest we'd have to check for multiple transitions at each state)
     */
    fst::Determinize(dictionary, new_dict);
    /* Finds the simplest equivalent fst. This is unnecessary but decreases
     * memory usage of the dictionary
     */
    fst::Minimize(new_dict);
    this->dictionary = new_dict;
 }
--- a/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
@ -0,0 +1,114 @@
 // Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
 // "COPYING.LESSER.3");
 #ifndef SCORER_H_
 #define SCORER_H_
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "lm/enumerate_vocab.hh"
 #include "lm/virtual_interface.hh"
 #include "lm/word_index.hh"
 #include "path_trie.h"
 const double OOV_SCORE = -1000.0;
 const std::string START_TOKEN = "<s>";
 const std::string UNK_TOKEN = "<unk>";
 const std::string END_TOKEN = "</s>";
 // Implement a callback to retrive the dictionary of language model.
 class RetriveStrEnumerateVocab : public lm::EnumerateVocab {
  public:
    RetriveStrEnumerateVocab() {}
    void Add(lm::WordIndex index, const StringPiece &str) {
        vocabulary.push_back(std::string(str.data(), str.length()));
    }
    std::vector<std::string> vocabulary;
 };
 /* External scorer to query score for n-gram or sentence, including language
 * model scoring and word insertion.
 *
 * Example:
 *     Scorer scorer(alpha, beta, "path_of_language_model");
 *     scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" });
 *     scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" });
 */
 class Scorer {
  public:
    Scorer(double alpha,
           double beta,
           const std::string &lm_path,
           const std::vector<std::string> &vocabulary);
    ~Scorer();
    double get_log_cond_prob(const std::vector<std::string> &words);
    double get_sent_log_prob(const std::vector<std::string> &words);
    // return the max order
    size_t get_max_order() const { return max_order_; }
    // return the dictionary size of language model
    size_t get_dict_size() const { return dict_size_; }
    // retrun true if the language model is character based
    bool is_character_based() const { return is_character_based_; }
    // reset params alpha & beta
    void reset_params(float alpha, float beta);
    // make ngram for a given prefix
    std::vector<std::string> make_ngram(PathTrie *prefix);
    // trransform the labels in index to the vector of words (word based lm) or
    // the vector of characters (character based lm)
    std::vector<std::string> split_labels(const std::vector<int> &labels);
    // language model weight
    double alpha;
    // word insertion weight
    double beta;
    // pointer to the dictionary of FST
    void *dictionary;
  protected:
    // necessary setup: load language model, set char map, fill FST's dictionary
    void setup(const std::string &lm_path,
               const std::vector<std::string> &vocab_list);
    // load language model from given path
    void load_lm(const std::string &lm_path);
    // fill dictionary for FST
    void fill_dictionary(bool add_space);
    // set char map
    void set_char_map(const std::vector<std::string> &char_list);
    double get_log_prob(const std::vector<std::string> &words);
    // translate the vector in index to string
    std::string vec2str(const std::vector<int> &input);
  private:
    void *language_model_;
    bool is_character_based_;
    size_t max_order_;
    size_t dict_size_;
    int SPACE_ID_;
    std::vector<std::string> char_list_;
    std::unordered_map<std::string, int> char_map_;
    std::vector<std::string> vocabulary_;
 };
 #endif  // SCORER_H_
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
@ -84,7 +84,7 @@ void CTCPrefixBeamSearch::AdvanceDecode(
        timer.Reset();
        std::vector<std::vector<kaldi::BaseFloat>> likelihood;
-        likelihood.push_back(frame_prob);
+        likelihood.push_back(std::move(frame_prob));
        AdvanceDecoding(likelihood);
        search_cost += timer.Elapsed();
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder.h
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
--- a/speechx/speechx/asr/decoder/decoder_itf.h
+++ b/speechx/speechx/asr/decoder/decoder_itf.h
--- a/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
--- a/speechx/speechx/asr/decoder/param.h
+++ b/speechx/speechx/asr/decoder/param.h
--- a/speechx/speechx/asr/nnet/CMakeLists.txt
+++ b/speechx/speechx/asr/nnet/CMakeLists.txt
--- a/speechx/speechx/asr/nnet/decodable.cc
+++ b/speechx/speechx/asr/nnet/decodable.cc
--- a/speechx/speechx/asr/nnet/decodable.h
+++ b/speechx/speechx/asr/nnet/decodable.h
--- a/speechx/speechx/asr/nnet/ds2_nnet.cc
+++ b/speechx/speechx/asr/nnet/ds2_nnet.cc
--- a/speechx/speechx/asr/nnet/ds2_nnet.h
+++ b/speechx/speechx/asr/nnet/ds2_nnet.h
--- a/speechx/speechx/asr/nnet/ds2_nnet_main.cc
+++ b/speechx/speechx/asr/nnet/ds2_nnet_main.cc
--- a/speechx/speechx/asr/nnet/nnet_itf.h
+++ b/speechx/speechx/asr/nnet/nnet_itf.h
--- a/speechx/speechx/asr/nnet/u2_nnet.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet.cc
--- a/speechx/speechx/asr/nnet/u2_nnet.h
+++ b/speechx/speechx/asr/nnet/u2_nnet.h
--- a/speechx/speechx/asr/nnet/u2_nnet_main.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet_main.cc
--- a/speechx/speechx/asr/recognizer/CMakeLists.txt
+++ b/speechx/speechx/asr/recognizer/CMakeLists.txt
--- a/speechx/speechx/asr/recognizer/recognizer.cc
+++ b/speechx/speechx/asr/recognizer/recognizer.cc
--- a/speechx/speechx/asr/recognizer/recognizer.h
+++ b/speechx/speechx/asr/recognizer/recognizer.h
--- a/speechx/speechx/asr/recognizer/recognizer_main.cc
+++ b/speechx/speechx/asr/recognizer/recognizer_main.cc
--- a/speechx/speechx/asr/recognizer/u2_recognizer.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.cc
--- a/speechx/speechx/asr/recognizer/u2_recognizer.h
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.h
--- a/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
--- a/speechx/speechx/asr/server/CMakeLists.txt
+++ b/speechx/speechx/asr/server/CMakeLists.txt
--- a/speechx/speechx/asr/server/websocket/CMakeLists.txt
+++ b/speechx/speechx/asr/server/websocket/CMakeLists.txt
--- a/speechx/speechx/asr/server/websocket/websocket_client.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_client.cc
--- a/speechx/speechx/asr/server/websocket/websocket_client.h
+++ b/speechx/speechx/asr/server/websocket/websocket_client.h
--- a/speechx/speechx/asr/server/websocket/websocket_client_main.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_client_main.cc
--- a/speechx/speechx/asr/server/websocket/websocket_server.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_server.cc
--- a/speechx/speechx/asr/server/websocket/websocket_server.h
+++ b/speechx/speechx/asr/server/websocket/websocket_server.h
--- a/speechx/speechx/asr/server/websocket/websocket_server_main.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_server_main.cc
--- a/speechx/speechx/common/CMakeLists.txt
+++ b/speechx/speechx/common/CMakeLists.txt
@ -0,0 +1,16 @@
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/base
 )
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}/../
 ${CMAKE_CURRENT_SOURCE_DIR}/utils
 )
 add_subdirectory(utils)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/frontend
 )
 add_subdirectory(frontend)
--- a/speechx/speechx/common/base/basic_types.h
+++ b/speechx/speechx/common/base/basic_types.h
--- a/speechx/speechx/common/base/common.h
+++ b/speechx/speechx/common/base/common.h
--- a/speechx/speechx/common/base/flags.h
+++ b/speechx/speechx/common/base/flags.h
--- a/speechx/speechx/common/base/log.h
+++ b/speechx/speechx/common/base/log.h
--- a/speechx/speechx/common/base/macros.h
+++ b/speechx/speechx/common/base/macros.h
--- a/speechx/speechx/common/base/thread_pool.h
+++ b/speechx/speechx/common/base/thread_pool.h
--- a/speechx/speechx/common/frontend/CMakeLists.txt
+++ b/speechx/speechx/common/frontend/CMakeLists.txt
--- a/speechx/speechx/common/frontend/audio/CMakeLists.txt
+++ b/speechx/speechx/common/frontend/audio/CMakeLists.txt
--- a/speechx/speechx/common/frontend/audio/assembler.cc
+++ b/speechx/speechx/common/frontend/audio/assembler.cc
--- a/speechx/speechx/common/frontend/audio/assembler.h
+++ b/speechx/speechx/common/frontend/audio/assembler.h
--- a/speechx/speechx/common/frontend/audio/audio_cache.cc
+++ b/speechx/speechx/common/frontend/audio/audio_cache.cc
--- a/speechx/speechx/common/frontend/audio/audio_cache.h
+++ b/speechx/speechx/common/frontend/audio/audio_cache.h
--- a/speechx/speechx/common/frontend/audio/cmvn.cc
+++ b/speechx/speechx/common/frontend/audio/cmvn.cc
--- a/speechx/speechx/common/frontend/audio/cmvn.h
+++ b/speechx/speechx/common/frontend/audio/cmvn.h
--- a/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
+++ b/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
--- a/speechx/speechx/common/frontend/audio/compute_fbank_main.cc
+++ b/speechx/speechx/common/frontend/audio/compute_fbank_main.cc
--- a/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
+++ b/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
--- a/speechx/speechx/common/frontend/audio/data_cache.h
+++ b/speechx/speechx/common/frontend/audio/data_cache.h
--- a/speechx/speechx/common/frontend/audio/db_norm.cc
+++ b/speechx/speechx/common/frontend/audio/db_norm.cc
--- a/speechx/speechx/common/frontend/audio/db_norm.h
+++ b/speechx/speechx/common/frontend/audio/db_norm.h
--- a/speechx/speechx/common/frontend/audio/fbank.cc
+++ b/speechx/speechx/common/frontend/audio/fbank.cc
--- a/speechx/speechx/common/frontend/audio/fbank.h
+++ b/speechx/speechx/common/frontend/audio/fbank.h
--- a/speechx/speechx/common/frontend/audio/feature_cache.cc
+++ b/speechx/speechx/common/frontend/audio/feature_cache.cc
--- a/speechx/speechx/common/frontend/audio/feature_cache.h
+++ b/speechx/speechx/common/frontend/audio/feature_cache.h
--- a/speechx/speechx/common/frontend/audio/feature_common.h
+++ b/speechx/speechx/common/frontend/audio/feature_common.h
--- a/speechx/speechx/common/frontend/audio/feature_common_inl.h
+++ b/speechx/speechx/common/frontend/audio/feature_common_inl.h
--- a/speechx/speechx/common/frontend/audio/feature_pipeline.cc
+++ b/speechx/speechx/common/frontend/audio/feature_pipeline.cc
--- a/speechx/speechx/common/frontend/audio/feature_pipeline.h
+++ b/speechx/speechx/common/frontend/audio/feature_pipeline.h
--- a/speechx/speechx/common/frontend/audio/frontend_itf.h
+++ b/speechx/speechx/common/frontend/audio/frontend_itf.h
--- a/speechx/speechx/common/frontend/audio/linear_spectrogram.cc
+++ b/speechx/speechx/common/frontend/audio/linear_spectrogram.cc
--- a/speechx/speechx/common/frontend/audio/linear_spectrogram.h
+++ b/speechx/speechx/common/frontend/audio/linear_spectrogram.h
--- a/speechx/speechx/common/frontend/audio/mfcc.cc
+++ b/speechx/speechx/common/frontend/audio/mfcc.cc
--- a/speechx/speechx/common/frontend/audio/mfcc.h
+++ b/speechx/speechx/common/frontend/audio/mfcc.h
--- a/speechx/speechx/common/frontend/audio/normalizer.h
+++ b/speechx/speechx/common/frontend/audio/normalizer.h
--- a/speechx/speechx/common/utils/CMakeLists.txt
+++ b/speechx/speechx/common/utils/CMakeLists.txt
--- a/speechx/speechx/common/utils/file_utils.cc
+++ b/speechx/speechx/common/utils/file_utils.cc
--- a/speechx/speechx/common/utils/file_utils.h
+++ b/speechx/speechx/common/utils/file_utils.h
--- a/speechx/speechx/common/utils/math.cc
+++ b/speechx/speechx/common/utils/math.cc
--- a/speechx/speechx/common/utils/math.h
+++ b/speechx/speechx/common/utils/math.h
--- a/speechx/speechx/decoder/ctc_decoders
+++ b/speechx/speechx/decoder/ctc_decoders
@ -1 +0,0 @@
 ../../../third_party/ctc_decoders
--- a/speechx/speechx/frontend/text/CMakeLists.txt
+++ b/speechx/speechx/frontend/text/CMakeLists.txt
--- a/speechx/speechx/kaldi/CMakeLists.txt
+++ b/speechx/speechx/kaldi/CMakeLists.txt
@ -1,4 +1,7 @@
 project(kaldi)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 )
 add_subdirectory(base)
 add_subdirectory(util)
--- a/speechx/speechx/third_party/CMakeLists.txt
+++ b/speechx/speechx/third_party/CMakeLists.txt
--- a/speechx/speechx/third_party/README.md
+++ b/speechx/speechx/third_party/README.md
@ -1,4 +0,0 @@
 # third party
 Those libs copied and developed from third pary opensource software projects.
 For all of these things, the official websites are the best place to go.