[engine] rename speechx (#2892)

* rename speechx * fix wfst decode error * replace reset with make_unique
1 year ago · 8e1b4cd513
parent 21183d48b6
commit 8e1b4cd513
298 changed files with 2061 additions and 1953 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -8,7 +8,7 @@ repos:
        entry: yapf
        args: [-i, -vv]
        types: [python]
-        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
+        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$

 -   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: a11d9314b22d8f8c7556443875b731ef05965464
@ -35,7 +35,7 @@ repos:
        -  --ignore=E501,E228,E226,E261,E266,E128,E402,W503
        -  --builtins=G,request
        -  --jobs=1
-        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
+        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$

 -   repo : https://github.com/Lucas-C/pre-commit-hooks
    rev: v1.0.1
@ -57,16 +57,16 @@ repos:
        entry: bash .pre-commit-hooks/clang-format.hook -i
        language: system
        files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
-        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders|speechx/speechx/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
+        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders|runtime/engine/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
    -   id: cpplint
        name: cpplint
        description: Static code analysis of C/C++ files
        language: python
        files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
-        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders|speechx/speechx/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
+        exclude: (?=runtime/engine/kaldi|runtime/engine/common/matrix|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders|runtime/engine/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
        entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent
 -   repo: https://github.com/asottile/reorder_python_imports
    rev: v2.4.0
    hooks:
      - id: reorder-python-imports
-        exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h\.hpp|\.py)$
+        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h\.hpp|\.py)$
--- a/README.md
+++ b/README.md
@ -164,7 +164,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
 - 👑 2022.11.18: Add [Whisper CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/pull/2640), support multi language recognition and translation.
 - 🔥 2022.11.18: Add [Wav2vec2 CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_ssl), Support ASR and Feature Extraction.
 - 🎉 2022.11.17: Add [male voice for TTS](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660).
- 🔥 2022.11.07: Add [U2/U2++ C++ High Performance Streaming ASR Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/speechx/examples/u2pp_ol/wenetspeech).
+- 🔥 2022.11.07: Add [U2/U2++ C++ High Performance Streaming ASR Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/runtime/examples/u2pp_ol/wenetspeech).
 - 👑 2022.11.01: Add [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) for [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
 - 🔥 2022.10.26: Add [Prosody Prediction](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy) for TTS.
 - 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend.
--- a/runtime/.clang-format
+++ b/runtime/.clang-format
--- a/runtime/.gitignore
+++ b/runtime/.gitignore
@ -1,2 +1,3 @@
 tools/valgrind*
 *log
+fc_patch/*
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@ -93,7 +93,7 @@ endif()

 # paddle libpaddle.so
 # paddle include and link option
-# -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
+# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
 execute_process(
    COMMAND python -c "\
 import os;\
@ -112,7 +112,7 @@ message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
 string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)

 # paddle compile option
-# -I/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/include
+# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
 execute_process(
    COMMAND python -c "\
 import paddle; \
@ -143,6 +143,6 @@ message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
 ###############################################################################
 # Add local library
 ###############################################################################
-set(SPEECHX_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/speechx)
+set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)

-add_subdirectory(speechx)
+add_subdirectory(engine)
--- a/runtime/README.md
+++ b/runtime/README.md
@ -1,4 +1,3 @@
-# SpeechX -- All in One Speech Task Inference 

 ## Environment

@ -9,7 +8,7 @@ We develop under:
 * gcc/g++/gfortran - 8.2.0
 * cmake - 3.16.0

-> Please use `tools/env.sh` to create python `venv`, then `source venv/bin/activate` to build speechx.
+> Please use `tools/env.sh` to create python `venv`, then `source venv/bin/activate` to build engine.

 > We make sure all things work fun under docker, and recommend using it to develop and deploy.

@ -33,7 +32,7 @@ docker run --privileged  --net=host --ipc=host -it --rm -v /path/to/paddlespeech
 bash tools/venv.sh
 ```

-2. Build `speechx` and `examples`.
+2. Build `engine` and `examples`.

 For now we are using feature under `develop` branch of paddle, so we need to install `paddlepaddle` nightly build version.
 For example: 
--- a/runtime/build.sh
+++ b/runtime/build.sh
--- a/runtime/cmake/EnableCMP0048.cmake
+++ b/runtime/cmake/EnableCMP0048.cmake
--- a/runtime/cmake/FindGFortranLibs.cmake
+++ b/runtime/cmake/FindGFortranLibs.cmake
--- a/runtime/cmake/absl.cmake
+++ b/runtime/cmake/absl.cmake
--- a/runtime/cmake/boost.cmake
+++ b/runtime/cmake/boost.cmake
--- a/runtime/cmake/eigen.cmake
+++ b/runtime/cmake/eigen.cmake
--- a/runtime/cmake/gflags.cmake
+++ b/runtime/cmake/gflags.cmake
@ -2,10 +2,10 @@ include(FetchContent)

 FetchContent_Declare(
  gflags
-  URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
+  URL      https://paddleaudio.bj.bcebos.com/build/gflag-2.2.2.zip 
  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
 )
 FetchContent_MakeAvailable(gflags)

 # openfst need
-include_directories(${gflags_BINARY_DIR}/include)
+include_directories(${gflags_BINARY_DIR}/include)
--- a/runtime/cmake/glog.cmake
+++ b/runtime/cmake/glog.cmake
@ -1,7 +1,7 @@
 include(FetchContent)
 FetchContent_Declare(
  glog
-  URL      https://github.com/google/glog/archive/v0.4.0.zip
+  URL      https://paddleaudio.bj.bcebos.com/build/glog-0.4.0.zip
  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
 )
 FetchContent_MakeAvailable(glog)
--- a/runtime/cmake/gtest.cmake
+++ b/runtime/cmake/gtest.cmake
@ -2,7 +2,7 @@
 include(FetchContent)
 FetchContent_Declare(
  gtest
-  URL      https://github.com/google/googletest/archive/release-1.11.0.zip
+  URL      https://paddleaudio.bj.bcebos.com/build/gtest-release-1.11.0.zip
  URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
 )
 FetchContent_MakeAvailable(gtest)
@ -12,4 +12,4 @@ include_directories(${gtest_BINARY_DIR} ${gtest_SOURCE_DIR}/src)

 if(WITH_TESTING)
  enable_testing()
-endif()
+endif()
--- a/runtime/cmake/kenlm.cmake
+++ b/runtime/cmake/kenlm.cmake
--- a/runtime/cmake/libsndfile.cmake
+++ b/runtime/cmake/libsndfile.cmake
--- a/runtime/cmake/openblas.cmake
+++ b/runtime/cmake/openblas.cmake
--- a/runtime/cmake/openfst.cmake
+++ b/runtime/cmake/openfst.cmake
@ -1,8 +1,8 @@
-include(FetchContent)
 set(openfst_PREFIX_DIR ${fc_patch}/openfst)
 set(openfst_SOURCE_DIR ${fc_patch}/openfst-src)
 set(openfst_BINARY_DIR ${fc_patch}/openfst-build)

+include(FetchContent)
 # openfst Acknowledgments:
 #Cyril Allauzen, Michael Riley, Johan Schalkwyk, Wojciech Skut and Mehryar Mohri, 
 #"OpenFst: A General and Efficient Weighted Finite-State Transducer Library", 
@ -25,5 +25,7 @@ ExternalProject_Add(openfst
 )
 link_directories(${openfst_PREFIX_DIR}/lib)
 include_directories(${openfst_PREFIX_DIR}/include)
+
+
 message(STATUS "OpenFST inc dir: ${openfst_PREFIX_DIR}/include")
-message(STATUS "OpenFST lib dir: ${openfst_PREFIX_DIR}/lib")
+message(STATUS "OpenFST lib dir: ${openfst_PREFIX_DIR}/lib")
--- a/runtime/cmake/paddleinference.cmake
+++ b/runtime/cmake/paddleinference.cmake
--- a/runtime/cmake/system.cmake
+++ b/runtime/cmake/system.cmake
--- a/runtime/docker/.gitkeep
+++ b/runtime/docker/.gitkeep
--- a/speechx/speechx/CMakeLists.txt
+++ b/speechx/speechx/CMakeLists.txt
--- a/speechx/speechx/asr/CMakeLists.txt
+++ b/speechx/speechx/asr/CMakeLists.txt
--- a/speechx/speechx/asr/decoder/CMakeLists.txt
+++ b/speechx/speechx/asr/decoder/CMakeLists.txt
--- a/speechx/speechx/asr/decoder/common.h
+++ b/speechx/speechx/asr/decoder/common.h
--- a/speechx/speechx/asr/decoder/ctc_beam_search_opt.h
+++ b/speechx/speechx/asr/decoder/ctc_beam_search_opt.h
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
@ -63,9 +63,7 @@ void CTCPrefixBeamSearch::Reset() {
    times_.emplace_back(empty);
 }

-void CTCPrefixBeamSearch::InitDecoder() { 
-    Reset(); 
-}
+void CTCPrefixBeamSearch::InitDecoder() { Reset(); }

 void CTCPrefixBeamSearch::AdvanceDecode(
    const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
@ -29,6 +29,11 @@ TLGDecoder::TLGDecoder(TLGDecoderOptions opts) : opts_(opts) {

 void TLGDecoder::Reset() {
    decoder_->InitDecoding();
+    hypotheses_.clear();
+    likelihood_.clear();
+    olabels_.clear();
+    times_.clear();
+
    num_frame_decoded_ = 0;
    return;
 }
@ -103,7 +108,7 @@ void TLGDecoder::FinalizeSearch() {
        time.push_back(idx);  // fake time, todo later
        hypotheses_.push_back(hypothese);
        times_.push_back(time);
-        olabels.push_back(words_id);
+        olabels_.push_back(words_id);
        likelihood_.push_back(-(weight.Value2() + weight.Value1()));
    }
 }
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder.h
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder.h
@ -24,6 +24,7 @@ DECLARE_string(graph_path);
 DECLARE_int32(max_active);
 DECLARE_double(beam);
 DECLARE_double(lattice_beam);
+DECLARE_int32(nbest);

 namespace ppspeech {

@ -46,7 +47,7 @@ struct TLGDecoderOptions {
        decoder_opts.opts.max_active = FLAGS_max_active;
        decoder_opts.opts.beam = FLAGS_beam;
        decoder_opts.opts.lattice_beam = FLAGS_lattice_beam;
-        // decoder_opts.nbest = FLAGS_lattice_nbest;
+        decoder_opts.nbest = FLAGS_nbest;
        LOG(INFO) << "LatticeFasterDecoder max active: "
                  << decoder_opts.opts.max_active;
        LOG(INFO) << "LatticeFasterDecoder beam: " << decoder_opts.opts.beam;
@ -85,7 +86,7 @@ class TLGDecoder : public DecoderBase {
        return hypotheses_;
    }
    const std::vector<std::vector<int>>& Outputs() const override {
-        return olabels;
+        return olabels_;
    }  // outputs_; }
    const std::vector<float>& Likelihood() const override {
        return likelihood_;
@ -111,8 +112,9 @@ class TLGDecoder : public DecoderBase {
  private:
    void AdvanceDecoding(kaldi::DecodableInterface* decodable);

+    int num_frame_decoded_;
    std::vector<std::vector<int>> hypotheses_;
-    std::vector<std::vector<int>> olabels;
+    std::vector<std::vector<int>> olabels_;
    std::vector<float> likelihood_;
    std::vector<std::vector<int>> times_;

@ -123,4 +125,4 @@ class TLGDecoder : public DecoderBase {
 };


-}  // namespace ppspeech
+}  // namespace ppspeech
--- a/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
--- a/speechx/speechx/asr/decoder/decoder_itf.h
+++ b/speechx/speechx/asr/decoder/decoder_itf.h
--- a/speechx/speechx/asr/decoder/param.h
+++ b/speechx/speechx/asr/decoder/param.h
@ -15,7 +15,6 @@
 #pragma once

 #include "base/common.h"
-//#include "decoder/ctc_tlg_decoder.h"

 // feature
 DEFINE_bool(use_fbank, false, "False for fbank; or linear feature");
--- a/speechx/speechx/asr/nnet/CMakeLists.txt
+++ b/speechx/speechx/asr/nnet/CMakeLists.txt
--- a/speechx/speechx/asr/nnet/decodable.cc
+++ b/speechx/speechx/asr/nnet/decodable.cc
--- a/speechx/speechx/asr/nnet/decodable.h
+++ b/speechx/speechx/asr/nnet/decodable.h
--- a/speechx/speechx/asr/nnet/nnet_itf.h
+++ b/speechx/speechx/asr/nnet/nnet_itf.h
--- a/speechx/speechx/asr/nnet/nnet_producer.cc
+++ b/speechx/speechx/asr/nnet/nnet_producer.cc
--- a/speechx/speechx/asr/nnet/nnet_producer.h
+++ b/speechx/speechx/asr/nnet/nnet_producer.h
--- a/speechx/speechx/asr/nnet/u2_nnet.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet.cc
--- a/speechx/speechx/asr/nnet/u2_nnet.h
+++ b/speechx/speechx/asr/nnet/u2_nnet.h
--- a/speechx/speechx/asr/nnet/u2_nnet_main.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet_main.cc
@ -13,13 +13,13 @@
 // limitations under the License.


+#include "nnet/u2_nnet.h"
 #include "base/common.h"
 #include "decoder/param.h"
 #include "frontend/assembler.h"
 #include "frontend/data_cache.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
-#include "nnet/u2_nnet.h"


 DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
@ -93,9 +93,9 @@ int main(int argc, char* argv[]) {
                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
            }
            if (this_chunk_size < receptive_field_length) {
-                LOG(WARNING)
-                    << "utt: " << utt << " skip last " << this_chunk_size
-                    << " frames, expect is " << receptive_field_length;
+                LOG(WARNING) << "utt: " << utt << " skip last "
+                             << this_chunk_size << " frames, expect is "
+                             << receptive_field_length;
                break;
            }

--- a/speechx/speechx/asr/nnet/u2_nnet_thread_main.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet_thread_main.cc
@ -13,13 +13,13 @@
 // limitations under the License.


+#include "nnet/u2_nnet.h"
 #include "base/common.h"
 #include "decoder/param.h"
-#include "frontend/wave-reader.h"
 #include "frontend/feature_pipeline.h"
+#include "frontend/wave-reader.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
-#include "nnet/u2_nnet.h"
 #include "nnet/nnet_producer.h"

 DEFINE_string(wav_rspecifier, "", "test wav rspecifier");
@ -104,7 +104,7 @@ int main(int argc, char* argv[]) {
        CHECK(sample_offset == tot_samples);

        std::vector<std::vector<kaldi::BaseFloat>> prob_vec;
-        while(1) {
+        while (1) {
            std::vector<kaldi::BaseFloat> logprobs;
            bool isok = nnet_producer->Read(&logprobs);
            if (nnet_producer->IsFinished()) break;
--- a/speechx/speechx/asr/recognizer/CMakeLists.txt
+++ b/speechx/speechx/asr/recognizer/CMakeLists.txt
--- a/speechx/speechx/asr/recognizer/u2_recognizer.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.cc
@ -33,12 +33,12 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
    decodable_.reset(new Decodable(nnet_producer_, am_scale));

    CHECK_NE(resource.vocab_path, "");
-    if (resource.decoder_opts.tlg_decoder_opts.fst_path == "") {
+    if (resource.decoder_opts.tlg_decoder_opts.fst_path.empty()) {
        LOG(INFO) << resource.decoder_opts.tlg_decoder_opts.fst_path;
-        decoder_.reset(new CTCPrefixBeamSearch(
-            resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts));
+        decoder_ = std::make_unique<CTCPrefixBeamSearch>(
+            resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
    } else {
-        decoder_.reset(new TLGDecoder(resource.decoder_opts.tlg_decoder_opts));
+        decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts);
    }

    symbol_table_ = decoder_->WordSymbolTable();
@ -268,4 +268,4 @@ void U2Recognizer::SetInputFinished() {
 }


-}  // namespace ppspeech
+}  // namespace ppspeech
--- a/speechx/speechx/asr/recognizer/u2_recognizer.h
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.h
@ -31,11 +31,9 @@ DECLARE_double(rescoring_weight);
 DECLARE_double(reverse_weight);
 DECLARE_int32(nbest);
 DECLARE_int32(blank);
-
 DECLARE_double(acoustic_scale);
 DECLARE_string(vocab_path);
 DECLARE_string(word_symbol_table);
-// DECLARE_string(fst_path);

 namespace ppspeech {

@ -74,10 +72,6 @@ struct DecodeOptions {
        decoder_opts.ctc_prefix_search_opts.blank = FLAGS_blank;
        decoder_opts.ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
        decoder_opts.ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
-        // decoder_opts.tlg_decoder_opts.fst_path = "";//FLAGS_fst_path;
-        // decoder_opts.tlg_decoder_opts.word_symbol_table =
-        // FLAGS_word_symbol_table;
-        // decoder_opts.tlg_decoder_opts.nbest = FLAGS_nbest;
        decoder_opts.tlg_decoder_opts =
            ppspeech::TLGDecoderOptions::InitFromFlags();

@ -183,4 +177,4 @@ class U2Recognizer {
    std::thread thread_;
 };

-}  // namespace ppspeech
+}  // namespace ppspeech
--- a/speechx/speechx/asr/recognizer/u2_recognizer_batch_main.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_batch_main.cc
--- a/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include "recognizer/u2_recognizer.h"
 #include "decoder/param.h"
 #include "frontend/wave-reader.h"
 #include "kaldi/util/table-types.h"
-#include "recognizer/u2_recognizer.h"

 DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
 DEFINE_string(result_wspecifier, "", "test result wspecifier");
--- a/speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc
@ -100,7 +100,7 @@ int main(int argc, char* argv[]) {
            continue;
        }

-        tot_decode_time += local_timer.Elapsed(); 
+        tot_decode_time += local_timer.Elapsed();
        LOG(INFO) << utt << " " << result;
        LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
                  << " cost: " << local_timer.Elapsed();
--- a/speechx/speechx/asr/server/CMakeLists.txt
+++ b/speechx/speechx/asr/server/CMakeLists.txt
--- a/speechx/speechx/asr/server/websocket/CMakeLists.txt
+++ b/speechx/speechx/asr/server/websocket/CMakeLists.txt
--- a/speechx/speechx/asr/server/websocket/websocket_client.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_client.cc
--- a/speechx/speechx/asr/server/websocket/websocket_client.h
+++ b/speechx/speechx/asr/server/websocket/websocket_client.h
--- a/speechx/speechx/asr/server/websocket/websocket_client_main.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_client_main.cc
@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include "websocket/websocket_client.h"
 #include "kaldi/feat/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
 #include "kaldi/util/table-types.h"
-#include "websocket/websocket_client.h"

 DEFINE_string(host, "127.0.0.1", "host of websocket server");
 DEFINE_int32(port, 8082, "port of websocket server");
--- a/speechx/speechx/asr/server/websocket/websocket_server.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_server.cc
--- a/speechx/speechx/asr/server/websocket/websocket_server.h
+++ b/speechx/speechx/asr/server/websocket/websocket_server.h
--- a/speechx/speechx/asr/server/websocket/websocket_server_main.cc
+++ b/speechx/speechx/asr/server/websocket/websocket_server_main.cc
@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "decoder/param.h"
 #include "websocket/websocket_server.h"
+#include "decoder/param.h"

 DEFINE_int32(port, 8082, "websocket listening port");

--- a/speechx/speechx/codelab/CMakeLists.txt
+++ b/speechx/speechx/codelab/CMakeLists.txt
--- a/speechx/speechx/codelab/README.md
+++ b/speechx/speechx/codelab/README.md
--- a/speechx/speechx/codelab/glog/CMakeLists.txt
+++ b/speechx/speechx/codelab/glog/CMakeLists.txt
--- a/speechx/speechx/codelab/glog/README.md
+++ b/speechx/speechx/codelab/glog/README.md
--- a/speechx/speechx/codelab/glog/glog_logtostderr_main.cc
+++ b/speechx/speechx/codelab/glog/glog_logtostderr_main.cc
--- a/speechx/speechx/codelab/glog/glog_main.cc
+++ b/speechx/speechx/codelab/glog/glog_main.cc
--- a/speechx/speechx/common/CMakeLists.txt
+++ b/speechx/speechx/common/CMakeLists.txt
--- a/speechx/speechx/common/base/basic_types.h
+++ b/speechx/speechx/common/base/basic_types.h
@ -28,7 +28,7 @@ typedef int int32;    // NOLINT
 #if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD)
 typedef long int64;  // NOLINT
 #else
-typedef long long int64;            // NOLINT
+typedef long long int64;  // NOLINT
 #endif

 typedef unsigned char uint8;    // NOLINT
--- a/speechx/speechx/common/base/common.h
+++ b/speechx/speechx/common/base/common.h
@ -21,6 +21,8 @@
 #include <cstring>
 #include <deque>
 #include <fstream>
+#include <functional>
+#include <future>
 #include <iomanip>
 #include <iostream>
 #include <istream>
@ -42,8 +44,6 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
-#include <future>
-#include <functional>

 #include "base/basic_types.h"
 #include "base/flags.h"
--- a/speechx/speechx/common/base/flags.h
+++ b/speechx/speechx/common/base/flags.h
--- a/speechx/speechx/common/base/log.h
+++ b/speechx/speechx/common/base/log.h
--- a/speechx/speechx/common/base/macros.h
+++ b/speechx/speechx/common/base/macros.h
--- a/speechx/speechx/common/base/safe_queue.h
+++ b/speechx/speechx/common/base/safe_queue.h
--- a/runtime/engine/common/base/safe_queue_inl.h
+++ b/runtime/engine/common/base/safe_queue_inl.h
--- a/speechx/speechx/common/base/thread_pool.h
+++ b/speechx/speechx/common/base/thread_pool.h
--- a/speechx/speechx/common/frontend/CMakeLists.txt
+++ b/speechx/speechx/common/frontend/CMakeLists.txt
--- a/speechx/speechx/common/frontend/assembler.cc
+++ b/speechx/speechx/common/frontend/assembler.cc
@ -97,8 +97,8 @@ bool Assembler::Compute(vector<BaseFloat>* feats) {
        CHECK(val.size() == dim_) << val.size();

        int32 start = counter * dim_;
-        std::memcpy(feats->data() + start,
-                    val.data(), val.size() * sizeof(BaseFloat));
+        std::memcpy(
+            feats->data() + start, val.data(), val.size() * sizeof(BaseFloat));

        if (this_chunk_size - counter <= cache_size_) {
            feature_cache_.push(val);
--- a/speechx/speechx/common/frontend/assembler.h
+++ b/speechx/speechx/common/frontend/assembler.h
--- a/speechx/speechx/common/frontend/audio_cache.cc
+++ b/speechx/speechx/common/frontend/audio_cache.cc
--- a/speechx/speechx/common/frontend/audio_cache.h
+++ b/speechx/speechx/common/frontend/audio_cache.h
--- a/speechx/speechx/common/frontend/cmvn.cc
+++ b/speechx/speechx/common/frontend/cmvn.cc
@ -84,11 +84,12 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
    KALDI_ASSERT(feats != NULL);

    if (feats->size() % dim_ != 0) {
-        LOG(ERROR)<< "Dim mismatch: cmvn " << mean_stats_.size() << ','
-                  << var_stats_.size() - 1 << ", feats " << feats->size() << 'x';
+        LOG(ERROR) << "Dim mismatch: cmvn " << mean_stats_.size() << ','
+                   << var_stats_.size() - 1 << ", feats " << feats->size()
+                   << 'x';
    }
    if (var_stats_.size() == 0 && var_norm_) {
-        LOG(ERROR) 
+        LOG(ERROR)
            << "You requested variance normalization but no variance stats_ "
            << "are supplied.";
    }
@ -98,8 +99,8 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
    // computing an offset and representing it as stats_, we use a count of one.
    if (count < 1.0)
        LOG(ERROR) << "Insufficient stats_ for cepstral mean and variance "
-                     "normalization: "
-                  << "count = " << count;
+                      "normalization: "
+                   << "count = " << count;

    if (!var_norm_) {
        vector<BaseFloat> offset(feats->size());
@ -112,11 +113,12 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
        // with the dim_ of feature.
        // the dim_ of feats = dim_ * num_frames;
        for (int32 idx = 0; idx < feats->size() / dim_; ++idx) {
-            std::memcpy(mean_stats_apply.data() + dim_ * idx, 
-            mean_stats.data(), dim_* sizeof(double));
+            std::memcpy(mean_stats_apply.data() + dim_ * idx,
+                        mean_stats.data(),
+                        dim_ * sizeof(double));
        }
        for (size_t idx = 0; idx < feats->size(); ++idx) {
-           feats->at(idx) += offset[idx];
+            feats->at(idx) += offset[idx];
        }
        return;
    }
@ -130,7 +132,7 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
        double var = (var_stats_[d] / count) - mean * mean, floor = 1.0e-20;
        if (var < floor) {
            LOG(WARNING) << "Flooring cepstral variance from " << var << " to "
-                       << floor;
+                         << floor;
            var = floor;
        }
        scale = 1.0 / sqrt(var);
@ -146,7 +148,7 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
    }
    // Apply the normalization.
    for (size_t idx = 0; idx < feats->size(); ++idx) {
-        feats->at(idx) *= norm1[idx]; 
+        feats->at(idx) *= norm1[idx];
    }

    for (size_t idx = 0; idx < feats->size(); ++idx) {
--- a/speechx/speechx/common/frontend/cmvn.h
+++ b/speechx/speechx/common/frontend/cmvn.h
--- a/speechx/speechx/common/frontend/compute_fbank_main.cc
+++ b/speechx/speechx/common/frontend/compute_fbank_main.cc
--- a/speechx/speechx/common/frontend/compute_linear_spectrogram_main.cc
+++ b/speechx/speechx/common/frontend/compute_linear_spectrogram_main.cc
--- a/speechx/speechx/common/frontend/data_cache.h
+++ b/speechx/speechx/common/frontend/data_cache.h
--- a/speechx/speechx/common/frontend/db_norm.cc
+++ b/speechx/speechx/common/frontend/db_norm.cc
--- a/speechx/speechx/common/frontend/db_norm.h
+++ b/speechx/speechx/common/frontend/db_norm.h
--- a/speechx/speechx/common/frontend/fbank.cc
+++ b/speechx/speechx/common/frontend/fbank.cc
--- a/speechx/speechx/common/frontend/fbank.h
+++ b/speechx/speechx/common/frontend/fbank.h
@ -15,8 +15,8 @@
 #pragma once

 #include "base/common.h"
-#include "frontend/feature_common.h"
 #include "frontend/feature-fbank.h"
+#include "frontend/feature_common.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/feature-fbank.cc
+++ b/speechx/speechx/common/frontend/feature-fbank.cc
--- a/speechx/speechx/common/frontend/feature-fbank.h
+++ b/speechx/speechx/common/frontend/feature-fbank.h
--- a/speechx/speechx/common/frontend/feature-functions.cc
+++ b/speechx/speechx/common/frontend/feature-functions.cc
--- a/speechx/speechx/common/frontend/feature-functions.h
+++ b/speechx/speechx/common/frontend/feature-functions.h
--- a/speechx/speechx/common/frontend/feature-window.cc
+++ b/speechx/speechx/common/frontend/feature-window.cc
--- a/speechx/speechx/common/frontend/feature-window.h
+++ b/speechx/speechx/common/frontend/feature-window.h
--- a/speechx/speechx/common/frontend/feature_cache.cc
+++ b/speechx/speechx/common/frontend/feature_cache.cc
@ -67,7 +67,7 @@ bool FeatureCache::Compute() {

    for (int chunk_idx = 0; chunk_idx < num_chunk; ++chunk_idx) {
        int32 start = chunk_idx * dim_;
-        vector<BaseFloat> feature_chunk(feature.data() + start, 
+        vector<BaseFloat> feature_chunk(feature.data() + start,
                                        feature.data() + start + dim_);
        // feed cache
        cache_.push(feature_chunk);
--- a/speechx/speechx/common/frontend/feature_cache.h
+++ b/speechx/speechx/common/frontend/feature_cache.h
@ -57,7 +57,7 @@ class FeatureCache : public FrontendInterface {
    bool Compute();

    int32 dim_;
-    size_t max_size_;           // cache capacity
+    size_t max_size_;  // cache capacity
    std::unique_ptr<FrontendInterface> base_extractor_;

    std::queue<std::vector<BaseFloat>> cache_;  // feature cache
--- a/speechx/speechx/common/frontend/feature_common.h
+++ b/speechx/speechx/common/frontend/feature_common.h
@ -14,8 +14,8 @@

 #pragma once

-#include "frontend_itf.h"
 #include "frontend/feature-window.h"
+#include "frontend_itf.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/feature_common_inl.h
+++ b/speechx/speechx/common/frontend/feature_common_inl.h
--- a/speechx/speechx/common/frontend/feature_pipeline.cc
+++ b/speechx/speechx/common/frontend/feature_pipeline.cc
--- a/Show More
+++ b/Show More