From 869f4267d5fabbdf6f7b18515ebf33f28a755b6c Mon Sep 17 00:00:00 2001 From: YangZhou <56786796+SmileGoat@users.noreply.github.com> Date: Fri, 16 Dec 2022 12:17:03 +0800 Subject: [PATCH] [speechx]Speechx directory refactor (#2746) * refactor directory --- speechx/requirement.txt | 1 - speechx/speechx/CMakeLists.txt | 51 +- speechx/speechx/asr/CMakeLists.txt | 11 + .../speechx/{ => asr}/decoder/CMakeLists.txt | 0 speechx/speechx/{ => asr}/decoder/common.h | 0 .../decoder/ctc_beam_search_decoder.cc | 0 .../decoder/ctc_beam_search_decoder.h | 0 .../decoder/ctc_beam_search_decoder_main.cc | 0 .../{ => asr}/decoder/ctc_beam_search_opt.h | 0 .../asr/decoder/ctc_decoders/.gitignore | 9 + .../ctc_decoders/ctc_beam_search_decoder.cpp | 607 ++++++++++++++++++ .../ctc_decoders/ctc_beam_search_decoder.h | 175 +++++ .../ctc_decoders/ctc_greedy_decoder.cpp | 61 ++ .../decoder/ctc_decoders/ctc_greedy_decoder.h | 35 + .../decoder/ctc_decoders/decoder_utils.cpp | 193 ++++++ .../asr/decoder/ctc_decoders/decoder_utils.h | 111 ++++ .../asr/decoder/ctc_decoders/path_trie.cpp | 164 +++++ .../asr/decoder/ctc_decoders/path_trie.h | 82 +++ .../asr/decoder/ctc_decoders/scorer.cpp | 232 +++++++ .../speechx/asr/decoder/ctc_decoders/scorer.h | 114 ++++ .../decoder/ctc_prefix_beam_search_decoder.cc | 2 +- .../decoder/ctc_prefix_beam_search_decoder.h | 0 .../ctc_prefix_beam_search_decoder_main.cc | 0 .../decoder/ctc_prefix_beam_search_score.h | 0 .../{ => asr}/decoder/ctc_tlg_decoder.cc | 0 .../{ => asr}/decoder/ctc_tlg_decoder.h | 0 .../{ => asr}/decoder/ctc_tlg_decoder_main.cc | 0 .../speechx/{ => asr}/decoder/decoder_itf.h | 0 .../decoder/nnet_logprob_decoder_main.cc | 0 speechx/speechx/{ => asr}/decoder/param.h | 0 speechx/speechx/{ => asr}/nnet/CMakeLists.txt | 0 speechx/speechx/{ => asr}/nnet/decodable.cc | 0 speechx/speechx/{ => asr}/nnet/decodable.h | 0 speechx/speechx/{ => asr}/nnet/ds2_nnet.cc | 0 speechx/speechx/{ => asr}/nnet/ds2_nnet.h | 0 .../speechx/{ => asr}/nnet/ds2_nnet_main.cc | 0 speechx/speechx/{ => asr}/nnet/nnet_itf.h | 0 speechx/speechx/{ => asr}/nnet/u2_nnet.cc | 0 speechx/speechx/{ => asr}/nnet/u2_nnet.h | 0 .../speechx/{ => asr}/nnet/u2_nnet_main.cc | 0 .../{ => asr}/recognizer/CMakeLists.txt | 0 .../{ => asr}/recognizer/recognizer.cc | 0 .../speechx/{ => asr}/recognizer/recognizer.h | 0 .../{ => asr}/recognizer/recognizer_main.cc | 0 .../{ => asr}/recognizer/u2_recognizer.cc | 0 .../{ => asr}/recognizer/u2_recognizer.h | 0 .../recognizer/u2_recognizer_main.cc | 0 .../{protocol => asr/server}/CMakeLists.txt | 0 .../server}/websocket/CMakeLists.txt | 0 .../server}/websocket/websocket_client.cc | 0 .../server}/websocket/websocket_client.h | 0 .../websocket/websocket_client_main.cc | 0 .../server}/websocket/websocket_server.cc | 0 .../server}/websocket/websocket_server.h | 0 .../websocket/websocket_server_main.cc | 0 speechx/speechx/common/CMakeLists.txt | 16 + .../speechx/{ => common}/base/basic_types.h | 0 speechx/speechx/{ => common}/base/common.h | 0 speechx/speechx/{ => common}/base/flags.h | 0 speechx/speechx/{ => common}/base/log.h | 0 speechx/speechx/{ => common}/base/macros.h | 0 .../speechx/{ => common}/base/thread_pool.h | 0 .../{ => common}/frontend/CMakeLists.txt | 0 .../frontend/audio/CMakeLists.txt | 0 .../{ => common}/frontend/audio/assembler.cc | 0 .../{ => common}/frontend/audio/assembler.h | 0 .../frontend/audio/audio_cache.cc | 0 .../{ => common}/frontend/audio/audio_cache.h | 0 .../{ => common}/frontend/audio/cmvn.cc | 0 .../{ => common}/frontend/audio/cmvn.h | 0 .../frontend/audio/cmvn_json2kaldi_main.cc | 0 .../frontend/audio/compute_fbank_main.cc | 0 .../audio/compute_linear_spectrogram_main.cc | 0 .../{ => common}/frontend/audio/data_cache.h | 0 .../{ => common}/frontend/audio/db_norm.cc | 0 .../{ => common}/frontend/audio/db_norm.h | 0 .../{ => common}/frontend/audio/fbank.cc | 0 .../{ => common}/frontend/audio/fbank.h | 0 .../frontend/audio/feature_cache.cc | 0 .../frontend/audio/feature_cache.h | 0 .../frontend/audio/feature_common.h | 0 .../frontend/audio/feature_common_inl.h | 0 .../frontend/audio/feature_pipeline.cc | 0 .../frontend/audio/feature_pipeline.h | 0 .../frontend/audio/frontend_itf.h | 0 .../frontend/audio/linear_spectrogram.cc | 0 .../frontend/audio/linear_spectrogram.h | 0 .../{ => common}/frontend/audio/mfcc.cc | 0 .../{ => common}/frontend/audio/mfcc.h | 0 .../{ => common}/frontend/audio/normalizer.h | 0 .../speechx/{ => common}/utils/CMakeLists.txt | 0 .../speechx/{ => common}/utils/file_utils.cc | 0 .../speechx/{ => common}/utils/file_utils.h | 0 speechx/speechx/{ => common}/utils/math.cc | 0 speechx/speechx/{ => common}/utils/math.h | 0 speechx/speechx/decoder/ctc_decoders | 1 - speechx/speechx/frontend/text/CMakeLists.txt | 0 speechx/speechx/kaldi/CMakeLists.txt | 5 +- speechx/speechx/third_party/CMakeLists.txt | 0 speechx/speechx/third_party/README.md | 4 - 100 files changed, 1821 insertions(+), 53 deletions(-) delete mode 100644 speechx/requirement.txt create mode 100644 speechx/speechx/asr/CMakeLists.txt rename speechx/speechx/{ => asr}/decoder/CMakeLists.txt (100%) rename speechx/speechx/{ => asr}/decoder/common.h (100%) rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_decoder.cc (100%) rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_decoder.h (100%) rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_decoder_main.cc (100%) rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_opt.h (100%) create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/.gitignore create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/path_trie.h create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/scorer.h rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_decoder.cc (99%) rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_decoder.h (100%) rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_decoder_main.cc (100%) rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_score.h (100%) rename speechx/speechx/{ => asr}/decoder/ctc_tlg_decoder.cc (100%) rename speechx/speechx/{ => asr}/decoder/ctc_tlg_decoder.h (100%) rename speechx/speechx/{ => asr}/decoder/ctc_tlg_decoder_main.cc (100%) rename speechx/speechx/{ => asr}/decoder/decoder_itf.h (100%) rename speechx/speechx/{ => asr}/decoder/nnet_logprob_decoder_main.cc (100%) rename speechx/speechx/{ => asr}/decoder/param.h (100%) rename speechx/speechx/{ => asr}/nnet/CMakeLists.txt (100%) rename speechx/speechx/{ => asr}/nnet/decodable.cc (100%) rename speechx/speechx/{ => asr}/nnet/decodable.h (100%) rename speechx/speechx/{ => asr}/nnet/ds2_nnet.cc (100%) rename speechx/speechx/{ => asr}/nnet/ds2_nnet.h (100%) rename speechx/speechx/{ => asr}/nnet/ds2_nnet_main.cc (100%) rename speechx/speechx/{ => asr}/nnet/nnet_itf.h (100%) rename speechx/speechx/{ => asr}/nnet/u2_nnet.cc (100%) rename speechx/speechx/{ => asr}/nnet/u2_nnet.h (100%) rename speechx/speechx/{ => asr}/nnet/u2_nnet_main.cc (100%) rename speechx/speechx/{ => asr}/recognizer/CMakeLists.txt (100%) rename speechx/speechx/{ => asr}/recognizer/recognizer.cc (100%) rename speechx/speechx/{ => asr}/recognizer/recognizer.h (100%) rename speechx/speechx/{ => asr}/recognizer/recognizer_main.cc (100%) rename speechx/speechx/{ => asr}/recognizer/u2_recognizer.cc (100%) rename speechx/speechx/{ => asr}/recognizer/u2_recognizer.h (100%) rename speechx/speechx/{ => asr}/recognizer/u2_recognizer_main.cc (100%) rename speechx/speechx/{protocol => asr/server}/CMakeLists.txt (100%) rename speechx/speechx/{protocol => asr/server}/websocket/CMakeLists.txt (100%) rename speechx/speechx/{protocol => asr/server}/websocket/websocket_client.cc (100%) rename speechx/speechx/{protocol => asr/server}/websocket/websocket_client.h (100%) rename speechx/speechx/{protocol => asr/server}/websocket/websocket_client_main.cc (100%) rename speechx/speechx/{protocol => asr/server}/websocket/websocket_server.cc (100%) rename speechx/speechx/{protocol => asr/server}/websocket/websocket_server.h (100%) rename speechx/speechx/{protocol => asr/server}/websocket/websocket_server_main.cc (100%) create mode 100644 speechx/speechx/common/CMakeLists.txt rename speechx/speechx/{ => common}/base/basic_types.h (100%) rename speechx/speechx/{ => common}/base/common.h (100%) rename speechx/speechx/{ => common}/base/flags.h (100%) rename speechx/speechx/{ => common}/base/log.h (100%) rename speechx/speechx/{ => common}/base/macros.h (100%) rename speechx/speechx/{ => common}/base/thread_pool.h (100%) rename speechx/speechx/{ => common}/frontend/CMakeLists.txt (100%) rename speechx/speechx/{ => common}/frontend/audio/CMakeLists.txt (100%) rename speechx/speechx/{ => common}/frontend/audio/assembler.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/assembler.h (100%) rename speechx/speechx/{ => common}/frontend/audio/audio_cache.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/audio_cache.h (100%) rename speechx/speechx/{ => common}/frontend/audio/cmvn.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/cmvn.h (100%) rename speechx/speechx/{ => common}/frontend/audio/cmvn_json2kaldi_main.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/compute_fbank_main.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/compute_linear_spectrogram_main.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/data_cache.h (100%) rename speechx/speechx/{ => common}/frontend/audio/db_norm.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/db_norm.h (100%) rename speechx/speechx/{ => common}/frontend/audio/fbank.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/fbank.h (100%) rename speechx/speechx/{ => common}/frontend/audio/feature_cache.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/feature_cache.h (100%) rename speechx/speechx/{ => common}/frontend/audio/feature_common.h (100%) rename speechx/speechx/{ => common}/frontend/audio/feature_common_inl.h (100%) rename speechx/speechx/{ => common}/frontend/audio/feature_pipeline.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/feature_pipeline.h (100%) rename speechx/speechx/{ => common}/frontend/audio/frontend_itf.h (100%) rename speechx/speechx/{ => common}/frontend/audio/linear_spectrogram.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/linear_spectrogram.h (100%) rename speechx/speechx/{ => common}/frontend/audio/mfcc.cc (100%) rename speechx/speechx/{ => common}/frontend/audio/mfcc.h (100%) rename speechx/speechx/{ => common}/frontend/audio/normalizer.h (100%) rename speechx/speechx/{ => common}/utils/CMakeLists.txt (100%) rename speechx/speechx/{ => common}/utils/file_utils.cc (100%) rename speechx/speechx/{ => common}/utils/file_utils.h (100%) rename speechx/speechx/{ => common}/utils/math.cc (100%) rename speechx/speechx/{ => common}/utils/math.h (100%) delete mode 120000 speechx/speechx/decoder/ctc_decoders delete mode 100644 speechx/speechx/frontend/text/CMakeLists.txt delete mode 100644 speechx/speechx/third_party/CMakeLists.txt delete mode 100644 speechx/speechx/third_party/README.md diff --git a/speechx/requirement.txt b/speechx/requirement.txt deleted file mode 100644 index 6a6db096..00000000 --- a/speechx/requirement.txt +++ /dev/null @@ -1 +0,0 @@ -paddlepaddle>=2.4rc diff --git a/speechx/speechx/CMakeLists.txt b/speechx/speechx/CMakeLists.txt index 60c18347..b522e158 100644 --- a/speechx/speechx/CMakeLists.txt +++ b/speechx/speechx/CMakeLists.txt @@ -2,50 +2,11 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) project(speechx LANGUAGES CXX) -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/kaldi -) -add_subdirectory(kaldi) - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/utils -) -add_subdirectory(utils) - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/frontend -) -add_subdirectory(frontend) - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/nnet -) -add_subdirectory(nnet) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common) -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/decoder -) -add_subdirectory(decoder) - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/recognizer -) -add_subdirectory(recognizer) - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/protocol -) -add_subdirectory(protocol) - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR} -${CMAKE_CURRENT_SOURCE_DIR}/codelab -) +add_subdirectory(asr) +add_subdirectory(common) +add_subdirectory(kaldi) add_subdirectory(codelab) diff --git a/speechx/speechx/asr/CMakeLists.txt b/speechx/speechx/asr/CMakeLists.txt new file mode 100644 index 00000000..ff4cdecb --- /dev/null +++ b/speechx/speechx/asr/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +project(ASR LANGUAGES CXX) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/server) + +add_subdirectory(decoder) +add_subdirectory(recognizer) +add_subdirectory(nnet) +add_subdirectory(server) diff --git a/speechx/speechx/decoder/CMakeLists.txt b/speechx/speechx/asr/decoder/CMakeLists.txt similarity index 100% rename from speechx/speechx/decoder/CMakeLists.txt rename to speechx/speechx/asr/decoder/CMakeLists.txt diff --git a/speechx/speechx/decoder/common.h b/speechx/speechx/asr/decoder/common.h similarity index 100% rename from speechx/speechx/decoder/common.h rename to speechx/speechx/asr/decoder/common.h diff --git a/speechx/speechx/decoder/ctc_beam_search_decoder.cc b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc similarity index 100% rename from speechx/speechx/decoder/ctc_beam_search_decoder.cc rename to speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc diff --git a/speechx/speechx/decoder/ctc_beam_search_decoder.h b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h similarity index 100% rename from speechx/speechx/decoder/ctc_beam_search_decoder.h rename to speechx/speechx/asr/decoder/ctc_beam_search_decoder.h diff --git a/speechx/speechx/decoder/ctc_beam_search_decoder_main.cc b/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc similarity index 100% rename from speechx/speechx/decoder/ctc_beam_search_decoder_main.cc rename to speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc diff --git a/speechx/speechx/decoder/ctc_beam_search_opt.h b/speechx/speechx/asr/decoder/ctc_beam_search_opt.h similarity index 100% rename from speechx/speechx/decoder/ctc_beam_search_opt.h rename to speechx/speechx/asr/decoder/ctc_beam_search_opt.h diff --git a/speechx/speechx/asr/decoder/ctc_decoders/.gitignore b/speechx/speechx/asr/decoder/ctc_decoders/.gitignore new file mode 100644 index 00000000..0b1046ae --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/.gitignore @@ -0,0 +1,9 @@ +ThreadPool/ +build/ +dist/ +kenlm/ +openfst-1.6.3/ +openfst-1.6.3.tar.gz +swig_decoders.egg-info/ +decoders_wrap.cxx +swig_decoders.py diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp new file mode 100644 index 00000000..ebea5c22 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp @@ -0,0 +1,607 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ctc_beam_search_decoder.h" + +#include +#include +#include +#include +#include +#include + +#include "ThreadPool.h" +#include "fst/fstlib.h" + +#include "decoder_utils.h" +#include "path_trie.h" + +using FSTMATCH = fst::SortedMatcher; + + +std::vector> ctc_beam_search_decoding( + const std::vector> &probs_seq, + const std::vector &vocabulary, + size_t beam_size, + double cutoff_prob, + size_t cutoff_top_n, + Scorer *ext_scorer, + size_t blank_id) { + // dimension check + size_t num_time_steps = probs_seq.size(); + for (size_t i = 0; i < num_time_steps; ++i) { + VALID_CHECK_EQ(probs_seq[i].size(), + // vocabulary.size() + 1, + vocabulary.size(), + "The shape of probs_seq does not match with " + "the shape of the vocabulary"); + } + + + // assign space id + auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE); + int space_id = it - vocabulary.begin(); + // if no space in vocabulary + if ((size_t)space_id >= vocabulary.size()) { + space_id = -2; + } + // init prefixes' root + PathTrie root; + root.score = root.log_prob_b_prev = 0.0; + std::vector prefixes; + prefixes.push_back(&root); + + if (ext_scorer != nullptr && !ext_scorer->is_character_based()) { + auto fst_dict = + static_cast(ext_scorer->dictionary); + fst::StdVectorFst *dict_ptr = fst_dict->Copy(true); + root.set_dictionary(dict_ptr); + auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); + root.set_matcher(matcher); + } + + // prefix search over time + for (size_t time_step = 0; time_step < num_time_steps; ++time_step) { + auto &prob = probs_seq[time_step]; + + float min_cutoff = -NUM_FLT_INF; + bool full_beam = false; + if (ext_scorer != nullptr) { + size_t num_prefixes = std::min(prefixes.size(), beam_size); + std::sort(prefixes.begin(), + prefixes.begin() + num_prefixes, + prefix_compare); + min_cutoff = prefixes[num_prefixes - 1]->score + + std::log(prob[blank_id]) - + std::max(0.0, ext_scorer->beta); + full_beam = (num_prefixes == beam_size); + } + + std::vector> log_prob_idx = + get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n); + // loop over chars + for (size_t index = 0; index < log_prob_idx.size(); index++) { + auto c = log_prob_idx[index].first; + auto log_prob_c = log_prob_idx[index].second; + + for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) { + auto prefix = prefixes[i]; + if (full_beam && log_prob_c + prefix->score < min_cutoff) { + break; + } + // blank + if (c == blank_id) { + prefix->log_prob_b_cur = log_sum_exp( + prefix->log_prob_b_cur, log_prob_c + prefix->score); + continue; + } + // repeated character + if (c == prefix->character) { + prefix->log_prob_nb_cur = + log_sum_exp(prefix->log_prob_nb_cur, + log_prob_c + prefix->log_prob_nb_prev); + } + // get new prefix + auto prefix_new = prefix->get_path_trie(c); + + if (prefix_new != nullptr) { + float log_p = -NUM_FLT_INF; + + if (c == prefix->character && + prefix->log_prob_b_prev > -NUM_FLT_INF) { + log_p = log_prob_c + prefix->log_prob_b_prev; + } else if (c != prefix->character) { + log_p = log_prob_c + prefix->score; + } + + // language model scoring + if (ext_scorer != nullptr && + (c == space_id || ext_scorer->is_character_based())) { + PathTrie *prefix_to_score = nullptr; + // skip scoring the space + if (ext_scorer->is_character_based()) { + prefix_to_score = prefix_new; + } else { + prefix_to_score = prefix; + } + + float score = 0.0; + std::vector ngram; + ngram = ext_scorer->make_ngram(prefix_to_score); + score = ext_scorer->get_log_cond_prob(ngram) * + ext_scorer->alpha; + log_p += score; + log_p += ext_scorer->beta; + } + prefix_new->log_prob_nb_cur = + log_sum_exp(prefix_new->log_prob_nb_cur, log_p); + } + } // end of loop over prefix + } // end of loop over vocabulary + + + prefixes.clear(); + // update log probs + root.iterate_to_vec(prefixes); + + // only preserve top beam_size prefixes + if (prefixes.size() >= beam_size) { + std::nth_element(prefixes.begin(), + prefixes.begin() + beam_size, + prefixes.end(), + prefix_compare); + for (size_t i = beam_size; i < prefixes.size(); ++i) { + prefixes[i]->remove(); + } + } + } // end of loop over time + + // score the last word of each prefix that doesn't end with space + if (ext_scorer != nullptr && !ext_scorer->is_character_based()) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + auto prefix = prefixes[i]; + if (!prefix->is_empty() && prefix->character != space_id) { + float score = 0.0; + std::vector ngram = ext_scorer->make_ngram(prefix); + score = + ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; + score += ext_scorer->beta; + prefix->score += score; + } + } + } + + size_t num_prefixes = std::min(prefixes.size(), beam_size); + std::sort( + prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); + + // compute approximate ctc score as the return score, without affecting the + // return order of decoding result. To delete when decoder gets stable. + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + double approx_ctc = prefixes[i]->score; + if (ext_scorer != nullptr) { + std::vector output; + prefixes[i]->get_path_vec(output); + auto prefix_length = output.size(); + auto words = ext_scorer->split_labels(output); + // remove word insert + approx_ctc = approx_ctc - prefix_length * ext_scorer->beta; + // remove language model weight: + approx_ctc -= + (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha; + } + prefixes[i]->approx_ctc = approx_ctc; + } + + return get_beam_search_result(prefixes, vocabulary, beam_size); +} + + +std::vector>> +ctc_beam_search_decoding_batch( + const std::vector>> &probs_split, + const std::vector &vocabulary, + size_t beam_size, + size_t num_processes, + double cutoff_prob, + size_t cutoff_top_n, + Scorer *ext_scorer, + size_t blank_id) { + VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); + // thread pool + ThreadPool pool(num_processes); + // number of samples + size_t batch_size = probs_split.size(); + + // enqueue the tasks of decoding + std::vector>>> res; + for (size_t i = 0; i < batch_size; ++i) { + res.emplace_back(pool.enqueue(ctc_beam_search_decoding, + probs_split[i], + vocabulary, + beam_size, + cutoff_prob, + cutoff_top_n, + ext_scorer, + blank_id)); + } + + // get decoding results + std::vector>> batch_results; + for (size_t i = 0; i < batch_size; ++i) { + batch_results.emplace_back(res[i].get()); + } + return batch_results; +} + +void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer) { + if (ext_scorer != nullptr && !ext_scorer->is_character_based()) { + auto fst_dict = + static_cast(ext_scorer->dictionary); + fst::StdVectorFst *dict_ptr = fst_dict->Copy(true); + root->set_dictionary(dict_ptr); + auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); + root->set_matcher(matcher); + } +} + +void ctc_beam_search_decode_chunk( + PathTrie *root, + std::vector &prefixes, + const std::vector> &probs_seq, + const std::vector &vocabulary, + size_t beam_size, + double cutoff_prob, + size_t cutoff_top_n, + Scorer *ext_scorer, + size_t blank_id) { + // dimension check + size_t num_time_steps = probs_seq.size(); + for (size_t i = 0; i < num_time_steps; ++i) { + VALID_CHECK_EQ(probs_seq[i].size(), + // vocabulary.size() + 1, + vocabulary.size(), + "The shape of probs_seq does not match with " + "the shape of the vocabulary"); + } + + // assign space id + auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE); + int space_id = it - vocabulary.begin(); + // if no space in vocabulary + if ((size_t)space_id >= vocabulary.size()) { + space_id = -2; + } + // init prefixes' root + // + // prefix search over time + for (size_t time_step = 0; time_step < num_time_steps; ++time_step) { + auto &prob = probs_seq[time_step]; + + float min_cutoff = -NUM_FLT_INF; + bool full_beam = false; + if (ext_scorer != nullptr) { + size_t num_prefixes = std::min(prefixes.size(), beam_size); + std::sort(prefixes.begin(), + prefixes.begin() + num_prefixes, + prefix_compare); + min_cutoff = prefixes[num_prefixes - 1]->score + + std::log(prob[blank_id]) - + std::max(0.0, ext_scorer->beta); + full_beam = (num_prefixes == beam_size); + } + + std::vector> log_prob_idx = + get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n); + // loop over chars + for (size_t index = 0; index < log_prob_idx.size(); index++) { + auto c = log_prob_idx[index].first; + auto log_prob_c = log_prob_idx[index].second; + + for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) { + auto prefix = prefixes[i]; + if (full_beam && log_prob_c + prefix->score < min_cutoff) { + break; + } + // blank + if (c == blank_id) { + prefix->log_prob_b_cur = log_sum_exp( + prefix->log_prob_b_cur, log_prob_c + prefix->score); + continue; + } + // repeated character + if (c == prefix->character) { + prefix->log_prob_nb_cur = + log_sum_exp(prefix->log_prob_nb_cur, + log_prob_c + prefix->log_prob_nb_prev); + } + // get new prefix + auto prefix_new = prefix->get_path_trie(c); + + if (prefix_new != nullptr) { + float log_p = -NUM_FLT_INF; + + if (c == prefix->character && + prefix->log_prob_b_prev > -NUM_FLT_INF) { + log_p = log_prob_c + prefix->log_prob_b_prev; + } else if (c != prefix->character) { + log_p = log_prob_c + prefix->score; + } + + // language model scoring + if (ext_scorer != nullptr && + (c == space_id || ext_scorer->is_character_based())) { + PathTrie *prefix_to_score = nullptr; + // skip scoring the space + if (ext_scorer->is_character_based()) { + prefix_to_score = prefix_new; + } else { + prefix_to_score = prefix; + } + + float score = 0.0; + std::vector ngram; + ngram = ext_scorer->make_ngram(prefix_to_score); + score = ext_scorer->get_log_cond_prob(ngram) * + ext_scorer->alpha; + log_p += score; + log_p += ext_scorer->beta; + } + prefix_new->log_prob_nb_cur = + log_sum_exp(prefix_new->log_prob_nb_cur, log_p); + } + } // end of loop over prefix + } // end of loop over vocabulary + + prefixes.clear(); + // update log probs + + root->iterate_to_vec(prefixes); + + // only preserve top beam_size prefixes + if (prefixes.size() >= beam_size) { + std::nth_element(prefixes.begin(), + prefixes.begin() + beam_size, + prefixes.end(), + prefix_compare); + for (size_t i = beam_size; i < prefixes.size(); ++i) { + prefixes[i]->remove(); + } + } + } // end of loop over time + + return; +} + + +std::vector> get_decode_result( + std::vector &prefixes, + const std::vector &vocabulary, + size_t beam_size, + Scorer *ext_scorer) { + auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE); + int space_id = it - vocabulary.begin(); + // if no space in vocabulary + if ((size_t)space_id >= vocabulary.size()) { + space_id = -2; + } + // score the last word of each prefix that doesn't end with space + if (ext_scorer != nullptr && !ext_scorer->is_character_based()) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + auto prefix = prefixes[i]; + if (!prefix->is_empty() && prefix->character != space_id) { + float score = 0.0; + std::vector ngram = ext_scorer->make_ngram(prefix); + score = + ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; + score += ext_scorer->beta; + prefix->score += score; + } + } + } + + size_t num_prefixes = std::min(prefixes.size(), beam_size); + std::sort( + prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); + + // compute aproximate ctc score as the return score, without affecting the + // return order of decoding result. To delete when decoder gets stable. + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + double approx_ctc = prefixes[i]->score; + if (ext_scorer != nullptr) { + std::vector output; + prefixes[i]->get_path_vec(output); + auto prefix_length = output.size(); + auto words = ext_scorer->split_labels(output); + // remove word insert + approx_ctc = approx_ctc - prefix_length * ext_scorer->beta; + // remove language model weight: + approx_ctc -= + (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha; + } + prefixes[i]->approx_ctc = approx_ctc; + } + + std::vector> res = + get_beam_search_result(prefixes, vocabulary, beam_size); + + // pay back the last word of each prefix that doesn't end with space (for + // decoding by chunk) + if (ext_scorer != nullptr && !ext_scorer->is_character_based()) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + auto prefix = prefixes[i]; + if (!prefix->is_empty() && prefix->character != space_id) { + float score = 0.0; + std::vector ngram = ext_scorer->make_ngram(prefix); + score = + ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; + score += ext_scorer->beta; + prefix->score -= score; + } + } + } + return res; +} + + +void free_storage(std::unique_ptr &storage) { + storage = nullptr; +} + + +CtcBeamSearchDecoderBatch::~CtcBeamSearchDecoderBatch() {} + +CtcBeamSearchDecoderBatch::CtcBeamSearchDecoderBatch( + const std::vector &vocabulary, + size_t batch_size, + size_t beam_size, + size_t num_processes, + double cutoff_prob, + size_t cutoff_top_n, + Scorer *ext_scorer, + size_t blank_id) + : batch_size(batch_size), + beam_size(beam_size), + num_processes(num_processes), + cutoff_prob(cutoff_prob), + cutoff_top_n(cutoff_top_n), + ext_scorer(ext_scorer), + blank_id(blank_id) { + VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!"); + VALID_CHECK_GT( + this->num_processes, 0, "num_processes must be nonnegative!"); + this->vocabulary = vocabulary; + for (size_t i = 0; i < batch_size; i++) { + this->decoder_storage_vector.push_back( + std::unique_ptr( + new CtcBeamSearchDecoderStorage())); + ctc_beam_search_decode_chunk_begin( + this->decoder_storage_vector[i]->root, ext_scorer); + } +}; + +/** + * Input + * probs_split: shape [B, T, D] + */ +void CtcBeamSearchDecoderBatch::next( + const std::vector>> &probs_split, + const std::vector &has_value) { + VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); + // thread pool + size_t num_has_value = 0; + for (int i = 0; i < has_value.size(); i++) + if (has_value[i] == "true") num_has_value += 1; + ThreadPool pool(std::min(num_processes, num_has_value)); + // number of samples + size_t probs_num = probs_split.size(); + VALID_CHECK_EQ(this->batch_size, + probs_num, + "The batch size of the current input data should be same " + "with the input data before"); + + // enqueue the tasks of decoding + std::vector> res; + for (size_t i = 0; i < batch_size; ++i) { + if (has_value[i] == "true") { + res.emplace_back(pool.enqueue( + ctc_beam_search_decode_chunk, + std::ref(this->decoder_storage_vector[i]->root), + std::ref(this->decoder_storage_vector[i]->prefixes), + probs_split[i], + this->vocabulary, + this->beam_size, + this->cutoff_prob, + this->cutoff_top_n, + this->ext_scorer, + this->blank_id)); + } + } + + for (size_t i = 0; i < batch_size; ++i) { + res[i].get(); + } + return; +}; + +/** + * Return + * batch_result: shape[B, beam_size,(-approx_ctc score, string)] + */ +std::vector>> +CtcBeamSearchDecoderBatch::decode() { + VALID_CHECK_GT( + this->num_processes, 0, "num_processes must be nonnegative!"); + // thread pool + ThreadPool pool(this->num_processes); + // number of samples + // enqueue the tasks of decoding + std::vector>>> res; + for (size_t i = 0; i < this->batch_size; ++i) { + res.emplace_back( + pool.enqueue(get_decode_result, + std::ref(this->decoder_storage_vector[i]->prefixes), + this->vocabulary, + this->beam_size, + this->ext_scorer)); + } + // get decoding results + std::vector>> batch_results; + for (size_t i = 0; i < this->batch_size; ++i) { + batch_results.emplace_back(res[i].get()); + } + return batch_results; +} + + +/** + * reset the state of ctcBeamSearchDecoderBatch + */ +void CtcBeamSearchDecoderBatch::reset_state(size_t batch_size, + size_t beam_size, + size_t num_processes, + double cutoff_prob, + size_t cutoff_top_n) { + this->batch_size = batch_size; + this->beam_size = beam_size; + this->num_processes = num_processes; + this->cutoff_prob = cutoff_prob; + this->cutoff_top_n = cutoff_top_n; + + VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!"); + VALID_CHECK_GT( + this->num_processes, 0, "num_processes must be nonnegative!"); + // thread pool + ThreadPool pool(this->num_processes); + // number of samples + // enqueue the tasks of decoding + std::vector> res; + size_t storage_size = decoder_storage_vector.size(); + for (size_t i = 0; i < storage_size; i++) { + res.emplace_back(pool.enqueue( + free_storage, std::ref(this->decoder_storage_vector[i]))); + } + for (size_t i = 0; i < storage_size; ++i) { + res[i].get(); + } + std::vector>().swap( + decoder_storage_vector); + for (size_t i = 0; i < this->batch_size; i++) { + this->decoder_storage_vector.push_back( + std::unique_ptr( + new CtcBeamSearchDecoderStorage())); + ctc_beam_search_decode_chunk_begin( + this->decoder_storage_vector[i]->root, this->ext_scorer); + } +} \ No newline at end of file diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h new file mode 100644 index 00000000..92d2b855 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h @@ -0,0 +1,175 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CTC_BEAM_SEARCH_DECODER_H_ +#define CTC_BEAM_SEARCH_DECODER_H_ + +#include +#include +#include + +#include "scorer.h" + +/* CTC Beam Search Decoder + + * Parameters: + * probs_seq: 2-D vector that each element is a vector of probabilities + * over vocabulary of one time step. + * vocabulary: A vector of vocabulary. + * beam_size: The width of beam search. + * cutoff_prob: Cutoff probability for pruning. + * cutoff_top_n: Cutoff number for pruning. + * ext_scorer: External scorer to evaluate a prefix, which consists of + * n-gram language model scoring and word insertion term. + * Default null, decoding the input sample without scorer. + * Return: + * A vector that each element is a pair of score and decoding result, + * in desending order. +*/ +std::vector> ctc_beam_search_decoding( + const std::vector> &probs_seq, + const std::vector &vocabulary, + size_t beam_size, + double cutoff_prob = 1.0, + size_t cutoff_top_n = 40, + Scorer *ext_scorer = nullptr, + size_t blank_id = 0); + + +/* CTC Beam Search Decoder for batch data + + * Parameters: + * probs_seq: 3-D vector that each element is a 2-D vector that can be used + * by ctc_beam_search_decoder(). + * vocabulary: A vector of vocabulary. + * beam_size: The width of beam search. + * num_processes: Number of threads for beam search. + * cutoff_prob: Cutoff probability for pruning. + * cutoff_top_n: Cutoff number for pruning. + * ext_scorer: External scorer to evaluate a prefix, which consists of + * n-gram language model scoring and word insertion term. + * Default null, decoding the input sample without scorer. + * Return: + * A 2-D vector that each element is a vector of beam search decoding + * result for one audio sample. +*/ +std::vector>> +ctc_beam_search_decoding_batch( + const std::vector>> &probs_split, + const std::vector &vocabulary, + size_t beam_size, + size_t num_processes, + double cutoff_prob = 1.0, + size_t cutoff_top_n = 40, + Scorer *ext_scorer = nullptr, + size_t blank_id = 0); + +/** + * Store the root and prefixes for decoder + */ + +class CtcBeamSearchDecoderStorage { + public: + PathTrie *root = nullptr; + std::vector prefixes; + + CtcBeamSearchDecoderStorage() { + // init prefixes' root + this->root = new PathTrie(); + this->root->log_prob_b_prev = 0.0; + // The score of root is in log scale.Since the prob=1.0, the prob score + // in log scale is 0.0 + this->root->score = root->log_prob_b_prev; + // std::vector prefixes; + this->prefixes.push_back(root); + }; + + ~CtcBeamSearchDecoderStorage() { + if (root != nullptr) { + delete root; + root = nullptr; + } + }; +}; + +/** + * The ctc beam search decoder, support batchsize >= 1 + */ +class CtcBeamSearchDecoderBatch { + public: + CtcBeamSearchDecoderBatch(const std::vector &vocabulary, + size_t batch_size, + size_t beam_size, + size_t num_processes, + double cutoff_prob, + size_t cutoff_top_n, + Scorer *ext_scorer, + size_t blank_id); + + ~CtcBeamSearchDecoderBatch(); + void next(const std::vector>> &probs_split, + const std::vector &has_value); + + std::vector>> decode(); + + void reset_state(size_t batch_size, + size_t beam_size, + size_t num_processes, + double cutoff_prob, + size_t cutoff_top_n); + + private: + std::vector vocabulary; + size_t batch_size; + size_t beam_size; + size_t num_processes; + double cutoff_prob; + size_t cutoff_top_n; + Scorer *ext_scorer; + size_t blank_id; + std::vector> + decoder_storage_vector; +}; + +/** + * function for chunk decoding + */ +void ctc_beam_search_decode_chunk( + PathTrie *root, + std::vector &prefixes, + const std::vector> &probs_seq, + const std::vector &vocabulary, + size_t beam_size, + double cutoff_prob, + size_t cutoff_top_n, + Scorer *ext_scorer, + size_t blank_id); + +std::vector> get_decode_result( + std::vector &prefixes, + const std::vector &vocabulary, + size_t beam_size, + Scorer *ext_scorer); + +/** + * free the CtcBeamSearchDecoderStorage + */ +void free_storage(std::unique_ptr &storage); + +/** + * initialize the root + */ +void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer); + +#endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp new file mode 100644 index 00000000..6aa3c996 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp @@ -0,0 +1,61 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ctc_greedy_decoder.h" +#include "decoder_utils.h" + +std::string ctc_greedy_decoding( + const std::vector> &probs_seq, + const std::vector &vocabulary, + size_t blank_id) { + // dimension check + size_t num_time_steps = probs_seq.size(); + for (size_t i = 0; i < num_time_steps; ++i) { + VALID_CHECK_EQ(probs_seq[i].size(), + vocabulary.size(), + "The shape of probs_seq does not match with " + "the shape of the vocabulary"); + } + + // size_t blank_id = vocabulary.size(); + + std::vector max_idx_vec(num_time_steps, 0); + std::vector idx_vec; + for (size_t i = 0; i < num_time_steps; ++i) { + double max_prob = 0.0; + size_t max_idx = 0; + const std::vector &probs_step = probs_seq[i]; + for (size_t j = 0; j < probs_step.size(); ++j) { + if (max_prob < probs_step[j]) { + max_idx = j; + max_prob = probs_step[j]; + } + } + // id with maximum probability in current time step + max_idx_vec[i] = max_idx; + // deduplicate + if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) { + idx_vec.push_back(max_idx_vec[i]); + } + } + + std::string best_path_result; + for (size_t i = 0; i < idx_vec.size(); ++i) { + if (idx_vec[i] != blank_id) { + std::string ch = vocabulary[idx_vec[i]]; + best_path_result += (ch == kSPACE) ? tSPACE : ch; + } + } + return best_path_result; +} diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h new file mode 100644 index 00000000..4451600d --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h @@ -0,0 +1,35 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CTC_GREEDY_DECODER_H +#define CTC_GREEDY_DECODER_H + +#include +#include + +/* CTC Greedy (Best Path) Decoder + * + * Parameters: + * probs_seq: 2-D vector that each element is a vector of probabilities + * over vocabulary of one time step. + * vocabulary: A vector of vocabulary. + * Return: + * The decoding result in string + */ +std::string ctc_greedy_decoding( + const std::vector>& probs_seq, + const std::vector& vocabulary, + size_t blank_id); + +#endif // CTC_GREEDY_DECODER_H diff --git a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp new file mode 100644 index 00000000..c7ef6542 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp @@ -0,0 +1,193 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "decoder_utils.h" + +#include +#include +#include + +std::vector> get_pruned_log_probs( + const std::vector &prob_step, + double cutoff_prob, + size_t cutoff_top_n) { + std::vector> prob_idx; + for (size_t i = 0; i < prob_step.size(); ++i) { + prob_idx.push_back(std::pair(i, prob_step[i])); + } + // pruning of vocabulary + size_t cutoff_len = prob_step.size(); + if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) { + std::sort(prob_idx.begin(), + prob_idx.end(), + pair_comp_second_rev); + if (cutoff_prob < 1.0) { + double cum_prob = 0.0; + cutoff_len = 0; + for (size_t i = 0; i < prob_idx.size(); ++i) { + cum_prob += prob_idx[i].second; + cutoff_len += 1; + if (cum_prob >= cutoff_prob || cutoff_len >= cutoff_top_n) + break; + } + } + prob_idx = std::vector>( + prob_idx.begin(), prob_idx.begin() + cutoff_len); + } + std::vector> log_prob_idx; + for (size_t i = 0; i < cutoff_len; ++i) { + log_prob_idx.push_back(std::pair( + prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); + } + return log_prob_idx; +} + + +std::vector> get_beam_search_result( + const std::vector &prefixes, + const std::vector &vocabulary, + size_t beam_size) { + // allow for the post processing + std::vector space_prefixes; + if (space_prefixes.empty()) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + space_prefixes.push_back(prefixes[i]); + } + } + + std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare); + std::vector> output_vecs; + for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) { + std::vector output; + space_prefixes[i]->get_path_vec(output); + // convert index to string + std::string output_str; + for (size_t j = 0; j < output.size(); j++) { + std::string ch = vocabulary[output[j]]; + output_str += (ch == kSPACE) ? tSPACE : ch; + } + std::pair output_pair( + -space_prefixes[i]->approx_ctc, output_str); + output_vecs.emplace_back(output_pair); + } + + return output_vecs; +} + +size_t get_utf8_str_len(const std::string &str) { + size_t str_len = 0; + for (char c : str) { + str_len += ((c & 0xc0) != 0x80); + } + return str_len; +} + +std::vector split_utf8_str(const std::string &str) { + std::vector result; + std::string out_str; + + for (char c : str) { + if ((c & 0xc0) != 0x80) // new UTF-8 character + { + if (!out_str.empty()) { + result.push_back(out_str); + out_str.clear(); + } + } + + out_str.append(1, c); + } + result.push_back(out_str); + return result; +} + +std::vector split_str(const std::string &s, + const std::string &delim) { + std::vector result; + std::size_t start = 0, delim_len = delim.size(); + while (true) { + std::size_t end = s.find(delim, start); + if (end == std::string::npos) { + if (start < s.size()) { + result.push_back(s.substr(start)); + } + break; + } + if (end > start) { + result.push_back(s.substr(start, end - start)); + } + start = end + delim_len; + } + return result; +} + +bool prefix_compare(const PathTrie *x, const PathTrie *y) { + if (x->score == y->score) { + if (x->character == y->character) { + return false; + } else { + return (x->character < y->character); + } + } else { + return x->score > y->score; + } +} + +void add_word_to_fst(const std::vector &word, + fst::StdVectorFst *dictionary) { + if (dictionary->NumStates() == 0) { + fst::StdVectorFst::StateId start = dictionary->AddState(); + assert(start == 0); + dictionary->SetStart(start); + } + fst::StdVectorFst::StateId src = dictionary->Start(); + fst::StdVectorFst::StateId dst; + for (auto c : word) { + dst = dictionary->AddState(); + dictionary->AddArc(src, fst::StdArc(c, c, 0, dst)); + src = dst; + } + dictionary->SetFinal(dst, fst::StdArc::Weight::One()); +} + +bool add_word_to_dictionary( + const std::string &word, + const std::unordered_map &char_map, + bool add_space, + int SPACE_ID, + fst::StdVectorFst *dictionary) { + auto characters = split_utf8_str(word); + + std::vector int_word; + + for (auto &c : characters) { + if (c == " ") { + int_word.push_back(SPACE_ID); + } else { + auto int_c = char_map.find(c); + if (int_c != char_map.end()) { + int_word.push_back(int_c->second); + } else { + return false; // return without adding + } + } + } + + if (add_space) { + int_word.push_back(SPACE_ID); + } + + add_word_to_fst(int_word, dictionary); + return true; // return with successful adding +} diff --git a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h new file mode 100644 index 00000000..09874155 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h @@ -0,0 +1,111 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DECODER_UTILS_H_ +#define DECODER_UTILS_H_ + +#include +#include +#include "fst/log.h" +#include "path_trie.h" + +const std::string kSPACE = ""; +const std::string tSPACE = " "; +const float NUM_FLT_INF = std::numeric_limits::max(); +const float NUM_FLT_MIN = std::numeric_limits::min(); + +// inline function for validation check +inline void check( + bool x, const char *expr, const char *file, int line, const char *err) { + if (!x) { + std::cout << "[" << file << ":" << line << "] "; + LOG(FATAL) << "\"" << expr << "\" check failed. " << err; + } +} + +#define VALID_CHECK(x, info) \ + check(static_cast(x), #x, __FILE__, __LINE__, info) +#define VALID_CHECK_EQ(x, y, info) VALID_CHECK((x) == (y), info) +#define VALID_CHECK_GT(x, y, info) VALID_CHECK((x) > (y), info) +#define VALID_CHECK_LT(x, y, info) VALID_CHECK((x) < (y), info) + + +// Function template for comparing two pairs +template +bool pair_comp_first_rev(const std::pair &a, + const std::pair &b) { + return a.first > b.first; +} + +// Function template for comparing two pairs +template +bool pair_comp_second_rev(const std::pair &a, + const std::pair &b) { + return a.second > b.second; +} + +// Return the sum of two probabilities in log scale +template +T log_sum_exp(const T &x, const T &y) { + static T num_min = -std::numeric_limits::max(); + if (x <= num_min) return y; + if (y <= num_min) return x; + T xmax = std::max(x, y); + return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax; +} + +// Get pruned probability vector for each time step's beam search +std::vector> get_pruned_log_probs( + const std::vector &prob_step, + double cutoff_prob, + size_t cutoff_top_n); + +// Get beam search result from prefixes in trie tree +std::vector> get_beam_search_result( + const std::vector &prefixes, + const std::vector &vocabulary, + size_t beam_size); + +// Functor for prefix comparsion +bool prefix_compare(const PathTrie *x, const PathTrie *y); + +/* Get length of utf8 encoding string + * See: http://stackoverflow.com/a/4063229 + */ +size_t get_utf8_str_len(const std::string &str); + +/* Split a string into a list of strings on a given string + * delimiter. NB: delimiters on beginning / end of string are + * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. + */ +std::vector split_str(const std::string &s, + const std::string &delim); + +/* Splits string into vector of strings representing + * UTF-8 characters (not same as chars) + */ +std::vector split_utf8_str(const std::string &str); + +// Add a word in index to the dicionary of fst +void add_word_to_fst(const std::vector &word, + fst::StdVectorFst *dictionary); + +// Add a word in string to dictionary +bool add_word_to_dictionary( + const std::string &word, + const std::unordered_map &char_map, + bool add_space, + int SPACE_ID, + fst::StdVectorFst *dictionary); +#endif // DECODER_UTILS_H diff --git a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp new file mode 100644 index 00000000..777ca052 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp @@ -0,0 +1,164 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "path_trie.h" + +#include +#include +#include +#include +#include + +#include "decoder_utils.h" + +PathTrie::PathTrie() { + log_prob_b_prev = -NUM_FLT_INF; + log_prob_nb_prev = -NUM_FLT_INF; + log_prob_b_cur = -NUM_FLT_INF; + log_prob_nb_cur = -NUM_FLT_INF; + score = -NUM_FLT_INF; + + ROOT_ = -1; + character = ROOT_; + exists_ = true; + parent = nullptr; + + dictionary_ = nullptr; + dictionary_state_ = 0; + has_dictionary_ = false; + + matcher_ = nullptr; +} + +PathTrie::~PathTrie() { + for (auto child : children_) { + delete child.second; + child.second = nullptr; + } +} + +PathTrie* PathTrie::get_path_trie(int new_char, bool reset) { + auto child = children_.begin(); + for (child = children_.begin(); child != children_.end(); ++child) { + if (child->first == new_char) { + break; + } + } + if (child != children_.end()) { + if (!child->second->exists_) { + child->second->exists_ = true; + child->second->log_prob_b_prev = -NUM_FLT_INF; + child->second->log_prob_nb_prev = -NUM_FLT_INF; + child->second->log_prob_b_cur = -NUM_FLT_INF; + child->second->log_prob_nb_cur = -NUM_FLT_INF; + } + return (child->second); + } else { + if (has_dictionary_) { + matcher_->SetState(dictionary_state_); + bool found = matcher_->Find(new_char + 1); + if (!found) { + // Adding this character causes word outside dictionary + auto FSTZERO = fst::TropicalWeight::Zero(); + auto final_weight = dictionary_->Final(dictionary_state_); + bool is_final = (final_weight != FSTZERO); + if (is_final && reset) { + dictionary_state_ = dictionary_->Start(); + } + return nullptr; + } else { + PathTrie* new_path = new PathTrie; + new_path->character = new_char; + new_path->parent = this; + new_path->dictionary_ = dictionary_; + new_path->dictionary_state_ = matcher_->Value().nextstate; + new_path->has_dictionary_ = true; + new_path->matcher_ = matcher_; + children_.push_back(std::make_pair(new_char, new_path)); + return new_path; + } + } else { + PathTrie* new_path = new PathTrie; + new_path->character = new_char; + new_path->parent = this; + children_.push_back(std::make_pair(new_char, new_path)); + return new_path; + } + } +} + +PathTrie* PathTrie::get_path_vec(std::vector& output) { + return get_path_vec(output, ROOT_); +} + +PathTrie* PathTrie::get_path_vec(std::vector& output, + int stop, + size_t max_steps) { + if (character == stop || character == ROOT_ || output.size() == max_steps) { + std::reverse(output.begin(), output.end()); + return this; + } else { + output.push_back(character); + return parent->get_path_vec(output, stop, max_steps); + } +} + +void PathTrie::iterate_to_vec(std::vector& output) { + if (exists_) { + log_prob_b_prev = log_prob_b_cur; + log_prob_nb_prev = log_prob_nb_cur; + + log_prob_b_cur = -NUM_FLT_INF; + log_prob_nb_cur = -NUM_FLT_INF; + + score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev); + output.push_back(this); + } + for (auto child : children_) { + child.second->iterate_to_vec(output); + } +} + +void PathTrie::remove() { + exists_ = false; + if (children_.size() == 0) { + if (parent != nullptr) { + auto child = parent->children_.begin(); + for (child = parent->children_.begin(); + child != parent->children_.end(); + ++child) { + if (child->first == character) { + parent->children_.erase(child); + break; + } + } + if (parent->children_.size() == 0 && !parent->exists_) { + parent->remove(); + } + } + delete this; + } +} + + +void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) { + dictionary_ = dictionary; + dictionary_state_ = dictionary->Start(); + has_dictionary_ = true; +} + +using FSTMATCH = fst::SortedMatcher; +void PathTrie::set_matcher(std::shared_ptr matcher) { + matcher_ = matcher; +} diff --git a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h new file mode 100644 index 00000000..5193e0a4 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h @@ -0,0 +1,82 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PATH_TRIE_H +#define PATH_TRIE_H + +#include +#include +#include +#include +#include + +#include "fst/fstlib.h" + +/* Trie tree for prefix storing and manipulating, with a dictionary in + * finite-state transducer for spelling correction. + */ +class PathTrie { + public: + PathTrie(); + ~PathTrie(); + + // get new prefix after appending new char + PathTrie* get_path_trie(int new_char, bool reset = true); + + // get the prefix in index from root to current node + PathTrie* get_path_vec(std::vector& output); + + // get the prefix in index from some stop node to current nodel + PathTrie* get_path_vec( + std::vector& output, + int stop, + size_t max_steps = std::numeric_limits::max()); + + // update log probs + void iterate_to_vec(std::vector& output); + + // set dictionary for FST + void set_dictionary(fst::StdVectorFst* dictionary); + + void set_matcher(std::shared_ptr>); + + bool is_empty() { return ROOT_ == character; } + + // remove current path from root + void remove(); + + float log_prob_b_prev; + float log_prob_nb_prev; + float log_prob_b_cur; + float log_prob_nb_cur; + float score; + float approx_ctc; + int character; + PathTrie* parent; + + private: + int ROOT_; + bool exists_; + bool has_dictionary_; + + std::vector> children_; + + // pointer to dictionary of FST + fst::StdVectorFst* dictionary_; + fst::StdVectorFst::StateId dictionary_state_; + // true if finding ars in FST + std::shared_ptr> matcher_; +}; + +#endif // PATH_TRIE_H diff --git a/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp b/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp new file mode 100644 index 00000000..6e7f68cf --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp @@ -0,0 +1,232 @@ +// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the +// "COPYING.LESSER.3"); + +#include "scorer.h" + +#include +#include + +#include "lm/config.hh" +#include "lm/model.hh" +#include "lm/state.hh" + +#include "decoder_utils.h" + +using namespace lm::ngram; +// if your platform is windows ,you need add the define +#define F_OK 0 +Scorer::Scorer(double alpha, + double beta, + const std::string& lm_path, + const std::vector& vocab_list) { + this->alpha = alpha; + this->beta = beta; + + dictionary = nullptr; + is_character_based_ = true; + language_model_ = nullptr; + + max_order_ = 0; + dict_size_ = 0; + SPACE_ID_ = -1; + + setup(lm_path, vocab_list); +} + +Scorer::~Scorer() { + if (language_model_ != nullptr) { + delete static_cast(language_model_); + } + if (dictionary != nullptr) { + delete static_cast(dictionary); + } +} + +void Scorer::setup(const std::string& lm_path, + const std::vector& vocab_list) { + // load language model + load_lm(lm_path); + // set char map for scorer + set_char_map(vocab_list); + // fill the dictionary for FST + if (!is_character_based()) { + fill_dictionary(true); + } +} + +void Scorer::load_lm(const std::string& lm_path) { + const char* filename = lm_path.c_str(); + VALID_CHECK_EQ(access(filename, F_OK), 0, "Invalid language model path"); + + RetriveStrEnumerateVocab enumerate; + lm::ngram::Config config; + config.enumerate_vocab = &enumerate; + language_model_ = lm::ngram::LoadVirtual(filename, config); + max_order_ = static_cast(language_model_)->Order(); + vocabulary_ = enumerate.vocabulary; + for (size_t i = 0; i < vocabulary_.size(); ++i) { + if (is_character_based_ && vocabulary_[i] != UNK_TOKEN && + vocabulary_[i] != START_TOKEN && vocabulary_[i] != END_TOKEN && + get_utf8_str_len(enumerate.vocabulary[i]) > 1) { + is_character_based_ = false; + } + } +} + +double Scorer::get_log_cond_prob(const std::vector& words) { + lm::base::Model* model = static_cast(language_model_); + double cond_prob; + lm::ngram::State state, tmp_state, out_state; + // avoid to inserting in begin + model->NullContextWrite(&state); + for (size_t i = 0; i < words.size(); ++i) { + lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]); + // encounter OOV + if (word_index == 0) { + return OOV_SCORE; + } + cond_prob = model->BaseScore(&state, word_index, &out_state); + tmp_state = state; + state = out_state; + out_state = tmp_state; + } + // return log10 prob + return cond_prob; +} + +double Scorer::get_sent_log_prob(const std::vector& words) { + std::vector sentence; + if (words.size() == 0) { + for (size_t i = 0; i < max_order_; ++i) { + sentence.push_back(START_TOKEN); + } + } else { + for (size_t i = 0; i < max_order_ - 1; ++i) { + sentence.push_back(START_TOKEN); + } + sentence.insert(sentence.end(), words.begin(), words.end()); + } + sentence.push_back(END_TOKEN); + return get_log_prob(sentence); +} + +double Scorer::get_log_prob(const std::vector& words) { + assert(words.size() > max_order_); + double score = 0.0; + for (size_t i = 0; i < words.size() - max_order_ + 1; ++i) { + std::vector ngram(words.begin() + i, + words.begin() + i + max_order_); + score += get_log_cond_prob(ngram); + } + return score; +} + +void Scorer::reset_params(float alpha, float beta) { + this->alpha = alpha; + this->beta = beta; +} + +std::string Scorer::vec2str(const std::vector& input) { + std::string word; + for (auto ind : input) { + word += char_list_[ind]; + } + return word; +} + +std::vector Scorer::split_labels(const std::vector& labels) { + if (labels.empty()) return {}; + + std::string s = vec2str(labels); + std::vector words; + if (is_character_based_) { + words = split_utf8_str(s); + } else { + words = split_str(s, " "); + } + return words; +} + +void Scorer::set_char_map(const std::vector& char_list) { + char_list_ = char_list; + char_map_.clear(); + + // Set the char map for the FST for spelling correction + for (size_t i = 0; i < char_list_.size(); i++) { + if (char_list_[i] == kSPACE) { + SPACE_ID_ = i; + } + // The initial state of FST is state 0, hence the index of chars in + // the FST should start from 1 to avoid the conflict with the initial + // state, otherwise wrong decoding results would be given. + char_map_[char_list_[i]] = i + 1; + } +} + +std::vector Scorer::make_ngram(PathTrie* prefix) { + std::vector ngram; + PathTrie* current_node = prefix; + PathTrie* new_node = nullptr; + + for (int order = 0; order < max_order_; order++) { + std::vector prefix_vec; + + if (is_character_based_) { + new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_, 1); + current_node = new_node; + } else { + new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_); + current_node = new_node->parent; // Skipping spaces + } + + // reconstruct word + std::string word = vec2str(prefix_vec); + ngram.push_back(word); + + if (new_node->character == -1) { + // No more spaces, but still need order + for (int i = 0; i < max_order_ - order - 1; i++) { + ngram.push_back(START_TOKEN); + } + break; + } + } + std::reverse(ngram.begin(), ngram.end()); + return ngram; +} + +void Scorer::fill_dictionary(bool add_space) { + fst::StdVectorFst dictionary; + // For each unigram convert to ints and put in trie + int dict_size = 0; + for (const auto& word : vocabulary_) { + bool added = add_word_to_dictionary( + word, char_map_, add_space, SPACE_ID_ + 1, &dictionary); + dict_size += added ? 1 : 0; + } + + dict_size_ = dict_size; + + /* Simplify FST + + * This gets rid of "epsilon" transitions in the FST. + * These are transitions that don't require a string input to be taken. + * Getting rid of them is necessary to make the FST deterministic, but + * can greatly increase the size of the FST + */ + fst::RmEpsilon(&dictionary); + fst::StdVectorFst* new_dict = new fst::StdVectorFst; + + /* This makes the FST deterministic, meaning for any string input there's + * only one possible state the FST could be in. It is assumed our + * dictionary is deterministic when using it. + * (lest we'd have to check for multiple transitions at each state) + */ + fst::Determinize(dictionary, new_dict); + + /* Finds the simplest equivalent fst. This is unnecessary but decreases + * memory usage of the dictionary + */ + fst::Minimize(new_dict); + this->dictionary = new_dict; +} diff --git a/speechx/speechx/asr/decoder/ctc_decoders/scorer.h b/speechx/speechx/asr/decoder/ctc_decoders/scorer.h new file mode 100644 index 00000000..08e109b7 --- /dev/null +++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.h @@ -0,0 +1,114 @@ +// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the +// "COPYING.LESSER.3"); + +#ifndef SCORER_H_ +#define SCORER_H_ + +#include +#include +#include +#include + +#include "lm/enumerate_vocab.hh" +#include "lm/virtual_interface.hh" +#include "lm/word_index.hh" + +#include "path_trie.h" + +const double OOV_SCORE = -1000.0; +const std::string START_TOKEN = ""; +const std::string UNK_TOKEN = ""; +const std::string END_TOKEN = ""; + +// Implement a callback to retrive the dictionary of language model. +class RetriveStrEnumerateVocab : public lm::EnumerateVocab { + public: + RetriveStrEnumerateVocab() {} + + void Add(lm::WordIndex index, const StringPiece &str) { + vocabulary.push_back(std::string(str.data(), str.length())); + } + + std::vector vocabulary; +}; + +/* External scorer to query score for n-gram or sentence, including language + * model scoring and word insertion. + * + * Example: + * Scorer scorer(alpha, beta, "path_of_language_model"); + * scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" }); + * scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); + */ +class Scorer { + public: + Scorer(double alpha, + double beta, + const std::string &lm_path, + const std::vector &vocabulary); + ~Scorer(); + + double get_log_cond_prob(const std::vector &words); + + double get_sent_log_prob(const std::vector &words); + + // return the max order + size_t get_max_order() const { return max_order_; } + + // return the dictionary size of language model + size_t get_dict_size() const { return dict_size_; } + + // retrun true if the language model is character based + bool is_character_based() const { return is_character_based_; } + + // reset params alpha & beta + void reset_params(float alpha, float beta); + + // make ngram for a given prefix + std::vector make_ngram(PathTrie *prefix); + + // trransform the labels in index to the vector of words (word based lm) or + // the vector of characters (character based lm) + std::vector split_labels(const std::vector &labels); + + // language model weight + double alpha; + // word insertion weight + double beta; + + // pointer to the dictionary of FST + void *dictionary; + + protected: + // necessary setup: load language model, set char map, fill FST's dictionary + void setup(const std::string &lm_path, + const std::vector &vocab_list); + + // load language model from given path + void load_lm(const std::string &lm_path); + + // fill dictionary for FST + void fill_dictionary(bool add_space); + + // set char map + void set_char_map(const std::vector &char_list); + + double get_log_prob(const std::vector &words); + + // translate the vector in index to string + std::string vec2str(const std::vector &input); + + private: + void *language_model_; + bool is_character_based_; + size_t max_order_; + size_t dict_size_; + + int SPACE_ID_; + std::vector char_list_; + std::unordered_map char_map_; + + std::vector vocabulary_; +}; + +#endif // SCORER_H_ diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc similarity index 99% rename from speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc index 07e8e560..15dbd7e9 100644 --- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc +++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc @@ -84,7 +84,7 @@ void CTCPrefixBeamSearch::AdvanceDecode( timer.Reset(); std::vector> likelihood; - likelihood.push_back(frame_prob); + likelihood.push_back(std::move(frame_prob)); AdvanceDecoding(likelihood); search_cost += timer.Elapsed(); diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h similarity index 100% rename from speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc similarity index 100% rename from speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_score.h b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h similarity index 100% rename from speechx/speechx/decoder/ctc_prefix_beam_search_score.h rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h diff --git a/speechx/speechx/decoder/ctc_tlg_decoder.cc b/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc similarity index 100% rename from speechx/speechx/decoder/ctc_tlg_decoder.cc rename to speechx/speechx/asr/decoder/ctc_tlg_decoder.cc diff --git a/speechx/speechx/decoder/ctc_tlg_decoder.h b/speechx/speechx/asr/decoder/ctc_tlg_decoder.h similarity index 100% rename from speechx/speechx/decoder/ctc_tlg_decoder.h rename to speechx/speechx/asr/decoder/ctc_tlg_decoder.h diff --git a/speechx/speechx/decoder/ctc_tlg_decoder_main.cc b/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc similarity index 100% rename from speechx/speechx/decoder/ctc_tlg_decoder_main.cc rename to speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc diff --git a/speechx/speechx/decoder/decoder_itf.h b/speechx/speechx/asr/decoder/decoder_itf.h similarity index 100% rename from speechx/speechx/decoder/decoder_itf.h rename to speechx/speechx/asr/decoder/decoder_itf.h diff --git a/speechx/speechx/decoder/nnet_logprob_decoder_main.cc b/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc similarity index 100% rename from speechx/speechx/decoder/nnet_logprob_decoder_main.cc rename to speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc diff --git a/speechx/speechx/decoder/param.h b/speechx/speechx/asr/decoder/param.h similarity index 100% rename from speechx/speechx/decoder/param.h rename to speechx/speechx/asr/decoder/param.h diff --git a/speechx/speechx/nnet/CMakeLists.txt b/speechx/speechx/asr/nnet/CMakeLists.txt similarity index 100% rename from speechx/speechx/nnet/CMakeLists.txt rename to speechx/speechx/asr/nnet/CMakeLists.txt diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/asr/nnet/decodable.cc similarity index 100% rename from speechx/speechx/nnet/decodable.cc rename to speechx/speechx/asr/nnet/decodable.cc diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/asr/nnet/decodable.h similarity index 100% rename from speechx/speechx/nnet/decodable.h rename to speechx/speechx/asr/nnet/decodable.h diff --git a/speechx/speechx/nnet/ds2_nnet.cc b/speechx/speechx/asr/nnet/ds2_nnet.cc similarity index 100% rename from speechx/speechx/nnet/ds2_nnet.cc rename to speechx/speechx/asr/nnet/ds2_nnet.cc diff --git a/speechx/speechx/nnet/ds2_nnet.h b/speechx/speechx/asr/nnet/ds2_nnet.h similarity index 100% rename from speechx/speechx/nnet/ds2_nnet.h rename to speechx/speechx/asr/nnet/ds2_nnet.h diff --git a/speechx/speechx/nnet/ds2_nnet_main.cc b/speechx/speechx/asr/nnet/ds2_nnet_main.cc similarity index 100% rename from speechx/speechx/nnet/ds2_nnet_main.cc rename to speechx/speechx/asr/nnet/ds2_nnet_main.cc diff --git a/speechx/speechx/nnet/nnet_itf.h b/speechx/speechx/asr/nnet/nnet_itf.h similarity index 100% rename from speechx/speechx/nnet/nnet_itf.h rename to speechx/speechx/asr/nnet/nnet_itf.h diff --git a/speechx/speechx/nnet/u2_nnet.cc b/speechx/speechx/asr/nnet/u2_nnet.cc similarity index 100% rename from speechx/speechx/nnet/u2_nnet.cc rename to speechx/speechx/asr/nnet/u2_nnet.cc diff --git a/speechx/speechx/nnet/u2_nnet.h b/speechx/speechx/asr/nnet/u2_nnet.h similarity index 100% rename from speechx/speechx/nnet/u2_nnet.h rename to speechx/speechx/asr/nnet/u2_nnet.h diff --git a/speechx/speechx/nnet/u2_nnet_main.cc b/speechx/speechx/asr/nnet/u2_nnet_main.cc similarity index 100% rename from speechx/speechx/nnet/u2_nnet_main.cc rename to speechx/speechx/asr/nnet/u2_nnet_main.cc diff --git a/speechx/speechx/recognizer/CMakeLists.txt b/speechx/speechx/asr/recognizer/CMakeLists.txt similarity index 100% rename from speechx/speechx/recognizer/CMakeLists.txt rename to speechx/speechx/asr/recognizer/CMakeLists.txt diff --git a/speechx/speechx/recognizer/recognizer.cc b/speechx/speechx/asr/recognizer/recognizer.cc similarity index 100% rename from speechx/speechx/recognizer/recognizer.cc rename to speechx/speechx/asr/recognizer/recognizer.cc diff --git a/speechx/speechx/recognizer/recognizer.h b/speechx/speechx/asr/recognizer/recognizer.h similarity index 100% rename from speechx/speechx/recognizer/recognizer.h rename to speechx/speechx/asr/recognizer/recognizer.h diff --git a/speechx/speechx/recognizer/recognizer_main.cc b/speechx/speechx/asr/recognizer/recognizer_main.cc similarity index 100% rename from speechx/speechx/recognizer/recognizer_main.cc rename to speechx/speechx/asr/recognizer/recognizer_main.cc diff --git a/speechx/speechx/recognizer/u2_recognizer.cc b/speechx/speechx/asr/recognizer/u2_recognizer.cc similarity index 100% rename from speechx/speechx/recognizer/u2_recognizer.cc rename to speechx/speechx/asr/recognizer/u2_recognizer.cc diff --git a/speechx/speechx/recognizer/u2_recognizer.h b/speechx/speechx/asr/recognizer/u2_recognizer.h similarity index 100% rename from speechx/speechx/recognizer/u2_recognizer.h rename to speechx/speechx/asr/recognizer/u2_recognizer.h diff --git a/speechx/speechx/recognizer/u2_recognizer_main.cc b/speechx/speechx/asr/recognizer/u2_recognizer_main.cc similarity index 100% rename from speechx/speechx/recognizer/u2_recognizer_main.cc rename to speechx/speechx/asr/recognizer/u2_recognizer_main.cc diff --git a/speechx/speechx/protocol/CMakeLists.txt b/speechx/speechx/asr/server/CMakeLists.txt similarity index 100% rename from speechx/speechx/protocol/CMakeLists.txt rename to speechx/speechx/asr/server/CMakeLists.txt diff --git a/speechx/speechx/protocol/websocket/CMakeLists.txt b/speechx/speechx/asr/server/websocket/CMakeLists.txt similarity index 100% rename from speechx/speechx/protocol/websocket/CMakeLists.txt rename to speechx/speechx/asr/server/websocket/CMakeLists.txt diff --git a/speechx/speechx/protocol/websocket/websocket_client.cc b/speechx/speechx/asr/server/websocket/websocket_client.cc similarity index 100% rename from speechx/speechx/protocol/websocket/websocket_client.cc rename to speechx/speechx/asr/server/websocket/websocket_client.cc diff --git a/speechx/speechx/protocol/websocket/websocket_client.h b/speechx/speechx/asr/server/websocket/websocket_client.h similarity index 100% rename from speechx/speechx/protocol/websocket/websocket_client.h rename to speechx/speechx/asr/server/websocket/websocket_client.h diff --git a/speechx/speechx/protocol/websocket/websocket_client_main.cc b/speechx/speechx/asr/server/websocket/websocket_client_main.cc similarity index 100% rename from speechx/speechx/protocol/websocket/websocket_client_main.cc rename to speechx/speechx/asr/server/websocket/websocket_client_main.cc diff --git a/speechx/speechx/protocol/websocket/websocket_server.cc b/speechx/speechx/asr/server/websocket/websocket_server.cc similarity index 100% rename from speechx/speechx/protocol/websocket/websocket_server.cc rename to speechx/speechx/asr/server/websocket/websocket_server.cc diff --git a/speechx/speechx/protocol/websocket/websocket_server.h b/speechx/speechx/asr/server/websocket/websocket_server.h similarity index 100% rename from speechx/speechx/protocol/websocket/websocket_server.h rename to speechx/speechx/asr/server/websocket/websocket_server.h diff --git a/speechx/speechx/protocol/websocket/websocket_server_main.cc b/speechx/speechx/asr/server/websocket/websocket_server_main.cc similarity index 100% rename from speechx/speechx/protocol/websocket/websocket_server_main.cc rename to speechx/speechx/asr/server/websocket/websocket_server_main.cc diff --git a/speechx/speechx/common/CMakeLists.txt b/speechx/speechx/common/CMakeLists.txt new file mode 100644 index 00000000..dea9eb05 --- /dev/null +++ b/speechx/speechx/common/CMakeLists.txt @@ -0,0 +1,16 @@ +include_directories( +${CMAKE_CURRENT_SOURCE_DIR} +${CMAKE_CURRENT_SOURCE_DIR}/base +) + +include_directories( +${CMAKE_CURRENT_SOURCE_DIR}/../ +${CMAKE_CURRENT_SOURCE_DIR}/utils +) +add_subdirectory(utils) + +include_directories( +${CMAKE_CURRENT_SOURCE_DIR} +${CMAKE_CURRENT_SOURCE_DIR}/frontend +) +add_subdirectory(frontend) diff --git a/speechx/speechx/base/basic_types.h b/speechx/speechx/common/base/basic_types.h similarity index 100% rename from speechx/speechx/base/basic_types.h rename to speechx/speechx/common/base/basic_types.h diff --git a/speechx/speechx/base/common.h b/speechx/speechx/common/base/common.h similarity index 100% rename from speechx/speechx/base/common.h rename to speechx/speechx/common/base/common.h diff --git a/speechx/speechx/base/flags.h b/speechx/speechx/common/base/flags.h similarity index 100% rename from speechx/speechx/base/flags.h rename to speechx/speechx/common/base/flags.h diff --git a/speechx/speechx/base/log.h b/speechx/speechx/common/base/log.h similarity index 100% rename from speechx/speechx/base/log.h rename to speechx/speechx/common/base/log.h diff --git a/speechx/speechx/base/macros.h b/speechx/speechx/common/base/macros.h similarity index 100% rename from speechx/speechx/base/macros.h rename to speechx/speechx/common/base/macros.h diff --git a/speechx/speechx/base/thread_pool.h b/speechx/speechx/common/base/thread_pool.h similarity index 100% rename from speechx/speechx/base/thread_pool.h rename to speechx/speechx/common/base/thread_pool.h diff --git a/speechx/speechx/frontend/CMakeLists.txt b/speechx/speechx/common/frontend/CMakeLists.txt similarity index 100% rename from speechx/speechx/frontend/CMakeLists.txt rename to speechx/speechx/common/frontend/CMakeLists.txt diff --git a/speechx/speechx/frontend/audio/CMakeLists.txt b/speechx/speechx/common/frontend/audio/CMakeLists.txt similarity index 100% rename from speechx/speechx/frontend/audio/CMakeLists.txt rename to speechx/speechx/common/frontend/audio/CMakeLists.txt diff --git a/speechx/speechx/frontend/audio/assembler.cc b/speechx/speechx/common/frontend/audio/assembler.cc similarity index 100% rename from speechx/speechx/frontend/audio/assembler.cc rename to speechx/speechx/common/frontend/audio/assembler.cc diff --git a/speechx/speechx/frontend/audio/assembler.h b/speechx/speechx/common/frontend/audio/assembler.h similarity index 100% rename from speechx/speechx/frontend/audio/assembler.h rename to speechx/speechx/common/frontend/audio/assembler.h diff --git a/speechx/speechx/frontend/audio/audio_cache.cc b/speechx/speechx/common/frontend/audio/audio_cache.cc similarity index 100% rename from speechx/speechx/frontend/audio/audio_cache.cc rename to speechx/speechx/common/frontend/audio/audio_cache.cc diff --git a/speechx/speechx/frontend/audio/audio_cache.h b/speechx/speechx/common/frontend/audio/audio_cache.h similarity index 100% rename from speechx/speechx/frontend/audio/audio_cache.h rename to speechx/speechx/common/frontend/audio/audio_cache.h diff --git a/speechx/speechx/frontend/audio/cmvn.cc b/speechx/speechx/common/frontend/audio/cmvn.cc similarity index 100% rename from speechx/speechx/frontend/audio/cmvn.cc rename to speechx/speechx/common/frontend/audio/cmvn.cc diff --git a/speechx/speechx/frontend/audio/cmvn.h b/speechx/speechx/common/frontend/audio/cmvn.h similarity index 100% rename from speechx/speechx/frontend/audio/cmvn.h rename to speechx/speechx/common/frontend/audio/cmvn.h diff --git a/speechx/speechx/frontend/audio/cmvn_json2kaldi_main.cc b/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc similarity index 100% rename from speechx/speechx/frontend/audio/cmvn_json2kaldi_main.cc rename to speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc diff --git a/speechx/speechx/frontend/audio/compute_fbank_main.cc b/speechx/speechx/common/frontend/audio/compute_fbank_main.cc similarity index 100% rename from speechx/speechx/frontend/audio/compute_fbank_main.cc rename to speechx/speechx/common/frontend/audio/compute_fbank_main.cc diff --git a/speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc b/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc similarity index 100% rename from speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc rename to speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc diff --git a/speechx/speechx/frontend/audio/data_cache.h b/speechx/speechx/common/frontend/audio/data_cache.h similarity index 100% rename from speechx/speechx/frontend/audio/data_cache.h rename to speechx/speechx/common/frontend/audio/data_cache.h diff --git a/speechx/speechx/frontend/audio/db_norm.cc b/speechx/speechx/common/frontend/audio/db_norm.cc similarity index 100% rename from speechx/speechx/frontend/audio/db_norm.cc rename to speechx/speechx/common/frontend/audio/db_norm.cc diff --git a/speechx/speechx/frontend/audio/db_norm.h b/speechx/speechx/common/frontend/audio/db_norm.h similarity index 100% rename from speechx/speechx/frontend/audio/db_norm.h rename to speechx/speechx/common/frontend/audio/db_norm.h diff --git a/speechx/speechx/frontend/audio/fbank.cc b/speechx/speechx/common/frontend/audio/fbank.cc similarity index 100% rename from speechx/speechx/frontend/audio/fbank.cc rename to speechx/speechx/common/frontend/audio/fbank.cc diff --git a/speechx/speechx/frontend/audio/fbank.h b/speechx/speechx/common/frontend/audio/fbank.h similarity index 100% rename from speechx/speechx/frontend/audio/fbank.h rename to speechx/speechx/common/frontend/audio/fbank.h diff --git a/speechx/speechx/frontend/audio/feature_cache.cc b/speechx/speechx/common/frontend/audio/feature_cache.cc similarity index 100% rename from speechx/speechx/frontend/audio/feature_cache.cc rename to speechx/speechx/common/frontend/audio/feature_cache.cc diff --git a/speechx/speechx/frontend/audio/feature_cache.h b/speechx/speechx/common/frontend/audio/feature_cache.h similarity index 100% rename from speechx/speechx/frontend/audio/feature_cache.h rename to speechx/speechx/common/frontend/audio/feature_cache.h diff --git a/speechx/speechx/frontend/audio/feature_common.h b/speechx/speechx/common/frontend/audio/feature_common.h similarity index 100% rename from speechx/speechx/frontend/audio/feature_common.h rename to speechx/speechx/common/frontend/audio/feature_common.h diff --git a/speechx/speechx/frontend/audio/feature_common_inl.h b/speechx/speechx/common/frontend/audio/feature_common_inl.h similarity index 100% rename from speechx/speechx/frontend/audio/feature_common_inl.h rename to speechx/speechx/common/frontend/audio/feature_common_inl.h diff --git a/speechx/speechx/frontend/audio/feature_pipeline.cc b/speechx/speechx/common/frontend/audio/feature_pipeline.cc similarity index 100% rename from speechx/speechx/frontend/audio/feature_pipeline.cc rename to speechx/speechx/common/frontend/audio/feature_pipeline.cc diff --git a/speechx/speechx/frontend/audio/feature_pipeline.h b/speechx/speechx/common/frontend/audio/feature_pipeline.h similarity index 100% rename from speechx/speechx/frontend/audio/feature_pipeline.h rename to speechx/speechx/common/frontend/audio/feature_pipeline.h diff --git a/speechx/speechx/frontend/audio/frontend_itf.h b/speechx/speechx/common/frontend/audio/frontend_itf.h similarity index 100% rename from speechx/speechx/frontend/audio/frontend_itf.h rename to speechx/speechx/common/frontend/audio/frontend_itf.h diff --git a/speechx/speechx/frontend/audio/linear_spectrogram.cc b/speechx/speechx/common/frontend/audio/linear_spectrogram.cc similarity index 100% rename from speechx/speechx/frontend/audio/linear_spectrogram.cc rename to speechx/speechx/common/frontend/audio/linear_spectrogram.cc diff --git a/speechx/speechx/frontend/audio/linear_spectrogram.h b/speechx/speechx/common/frontend/audio/linear_spectrogram.h similarity index 100% rename from speechx/speechx/frontend/audio/linear_spectrogram.h rename to speechx/speechx/common/frontend/audio/linear_spectrogram.h diff --git a/speechx/speechx/frontend/audio/mfcc.cc b/speechx/speechx/common/frontend/audio/mfcc.cc similarity index 100% rename from speechx/speechx/frontend/audio/mfcc.cc rename to speechx/speechx/common/frontend/audio/mfcc.cc diff --git a/speechx/speechx/frontend/audio/mfcc.h b/speechx/speechx/common/frontend/audio/mfcc.h similarity index 100% rename from speechx/speechx/frontend/audio/mfcc.h rename to speechx/speechx/common/frontend/audio/mfcc.h diff --git a/speechx/speechx/frontend/audio/normalizer.h b/speechx/speechx/common/frontend/audio/normalizer.h similarity index 100% rename from speechx/speechx/frontend/audio/normalizer.h rename to speechx/speechx/common/frontend/audio/normalizer.h diff --git a/speechx/speechx/utils/CMakeLists.txt b/speechx/speechx/common/utils/CMakeLists.txt similarity index 100% rename from speechx/speechx/utils/CMakeLists.txt rename to speechx/speechx/common/utils/CMakeLists.txt diff --git a/speechx/speechx/utils/file_utils.cc b/speechx/speechx/common/utils/file_utils.cc similarity index 100% rename from speechx/speechx/utils/file_utils.cc rename to speechx/speechx/common/utils/file_utils.cc diff --git a/speechx/speechx/utils/file_utils.h b/speechx/speechx/common/utils/file_utils.h similarity index 100% rename from speechx/speechx/utils/file_utils.h rename to speechx/speechx/common/utils/file_utils.h diff --git a/speechx/speechx/utils/math.cc b/speechx/speechx/common/utils/math.cc similarity index 100% rename from speechx/speechx/utils/math.cc rename to speechx/speechx/common/utils/math.cc diff --git a/speechx/speechx/utils/math.h b/speechx/speechx/common/utils/math.h similarity index 100% rename from speechx/speechx/utils/math.h rename to speechx/speechx/common/utils/math.h diff --git a/speechx/speechx/decoder/ctc_decoders b/speechx/speechx/decoder/ctc_decoders deleted file mode 120000 index b280de09..00000000 --- a/speechx/speechx/decoder/ctc_decoders +++ /dev/null @@ -1 +0,0 @@ -../../../third_party/ctc_decoders \ No newline at end of file diff --git a/speechx/speechx/frontend/text/CMakeLists.txt b/speechx/speechx/frontend/text/CMakeLists.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/speechx/speechx/kaldi/CMakeLists.txt b/speechx/speechx/kaldi/CMakeLists.txt index ce6b43f6..d27668fc 100644 --- a/speechx/speechx/kaldi/CMakeLists.txt +++ b/speechx/speechx/kaldi/CMakeLists.txt @@ -1,4 +1,7 @@ project(kaldi) +include_directories( +${CMAKE_CURRENT_SOURCE_DIR} +) add_subdirectory(base) add_subdirectory(util) @@ -10,4 +13,4 @@ add_subdirectory(decoder) add_subdirectory(lm) add_subdirectory(fstbin) -add_subdirectory(lmbin) \ No newline at end of file +add_subdirectory(lmbin) diff --git a/speechx/speechx/third_party/CMakeLists.txt b/speechx/speechx/third_party/CMakeLists.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/speechx/speechx/third_party/README.md b/speechx/speechx/third_party/README.md deleted file mode 100644 index 2d620335..00000000 --- a/speechx/speechx/third_party/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# third party - -Those libs copied and developed from third pary opensource software projects. -For all of these things, the official websites are the best place to go.