From a9f4ce47a34bbd62c88090ef9a6e3498dbfc669a Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 1 Apr 2022 10:24:16 +0000
Subject: [PATCH 1/5] frontend itf

---
 speechx/examples/feat/linear_spectrogram_main.cc   | 12 ++++++------
 speechx/speechx/frontend/audio_cache.h             |  4 ++--
 speechx/speechx/frontend/data_cache.h              |  4 ++--
 speechx/speechx/frontend/fbank.h                   |  4 ++--
 speechx/speechx/frontend/feature_cache.cc          |  2 +-
 speechx/speechx/frontend/feature_cache.h           |  8 ++++----
 .../frontend/feature_extractor_controller.h        | 13 -------------
 .../frontend/feature_extractor_controller_impl.h   | 13 -------------
 ...eature_extractor_interface.h => frontend_itf.h} |  2 +-
 speechx/speechx/frontend/linear_spectrogram.cc     |  2 +-
 speechx/speechx/frontend/linear_spectrogram.h      |  8 ++++----
 speechx/speechx/frontend/normalizer.cc             |  4 ++--
 speechx/speechx/frontend/normalizer.h              | 14 +++++++-------
 speechx/speechx/nnet/decodable.cc                  |  2 +-
 speechx/speechx/nnet/decodable.h                   |  6 +++---
 15 files changed, 36 insertions(+), 62 deletions(-)
 delete mode 100644 speechx/speechx/frontend/feature_extractor_controller.h
 delete mode 100644 speechx/speechx/frontend/feature_extractor_controller_impl.h
 rename speechx/speechx/frontend/{feature_extractor_interface.h => frontend_itf.h} (97%)

diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc
index e1f0a8954..c29d2b21f 100644
--- a/speechx/examples/feat/linear_spectrogram_main.cc
+++ b/speechx/examples/feat/linear_spectrogram_main.cc
@@ -20,7 +20,7 @@
 #include "frontend/audio_cache.h"
 #include "frontend/data_cache.h"
 #include "frontend/feature_cache.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "frontend/normalizer.h"
 #include "kaldi/feat/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
@@ -170,13 +170,13 @@ int main(int argc, char* argv[]) {
     // feature pipeline: wave cache --> decibel_normalizer --> hanning
     // window -->linear_spectrogram --> global cmvn -> feat cache
 
-    // std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
+    // std::unique_ptr<ppspeech::FrontendInterface> data_source(new
     // ppspeech::DataCache());
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(
+    std::unique_ptr<ppspeech::FrontendInterface> data_source(
         new ppspeech::AudioCache());
 
     ppspeech::DecibelNormalizerOptions db_norm_opt;
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> db_norm(
+    std::unique_ptr<ppspeech::FrontendInterface> db_norm(
         new ppspeech::DecibelNormalizer(db_norm_opt, std::move(data_source)));
 
     ppspeech::LinearSpectrogramOptions opt;
@@ -185,10 +185,10 @@ int main(int argc, char* argv[]) {
     LOG(INFO) << "frame length (ms): " << opt.frame_opts.frame_length_ms;
     LOG(INFO) << "frame shift (ms): " << opt.frame_opts.frame_shift_ms;
 
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> linear_spectrogram(
+    std::unique_ptr<ppspeech::FrontendInterface> linear_spectrogram(
         new ppspeech::LinearSpectrogram(opt, std::move(db_norm)));
 
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> cmvn(
+    std::unique_ptr<ppspeech::FrontendInterface> cmvn(
         new ppspeech::CMVN(FLAGS_cmvn_write_path,
                            std::move(linear_spectrogram)));
 
diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio_cache.h
index b6c82c69e..f48da12b7 100644
--- a/speechx/speechx/frontend/audio_cache.h
+++ b/speechx/speechx/frontend/audio_cache.h
@@ -16,12 +16,12 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 namespace ppspeech {
 
 // waves cache
-class AudioCache : public FeatureExtractorInterface {
+class AudioCache : public FrontendInterface {
   public:
     explicit AudioCache(int buffer_size = kint16max);
 
diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/data_cache.h
index dea51d76e..b8ce6bf65 100644
--- a/speechx/speechx/frontend/data_cache.h
+++ b/speechx/speechx/frontend/data_cache.h
@@ -17,13 +17,13 @@
 
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 
 namespace ppspeech {
 // A data source for testing different frontend module.
 // It accepts waves or feats.
-class DataCache : public FeatureExtractorInterface {
+class DataCache : public FrontendInterface {
   public:
     explicit DataCache() { finished_ = false; }
 
diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/fbank.h
index 7d9cf4221..68267b3d0 100644
--- a/speechx/speechx/frontend/fbank.h
+++ b/speechx/speechx/frontend/fbank.h
@@ -20,10 +20,10 @@
 
 namespace ppspeech {
 
-class FbankExtractor : FeatureExtractorInterface {
+class FbankExtractor : FrontendInterface {
   public:
     explicit FbankExtractor(const FbankOptions& opts,
-                            share_ptr<FeatureExtractorInterface> pre_extractor);
+                            share_ptr<FrontendInterface> pre_extractor);
     virtual void AcceptWaveform(
         const kaldi::Vector<kaldi::BaseFloat>& input) = 0;
     virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat) = 0;
diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/feature_cache.cc
index dad6907ce..53b7076d5 100644
--- a/speechx/speechx/frontend/feature_cache.cc
+++ b/speechx/speechx/frontend/feature_cache.cc
@@ -24,7 +24,7 @@ using kaldi::SubVector;
 using std::unique_ptr;
 
 FeatureCache::FeatureCache(
-    int max_size, unique_ptr<FeatureExtractorInterface> base_extractor) {
+    int max_size, unique_ptr<FrontendInterface> base_extractor) {
     max_size_ = max_size;
     base_extractor_ = std::move(base_extractor);
 }
diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/feature_cache.h
index f52b9b0f6..1281ec35a 100644
--- a/speechx/speechx/frontend/feature_cache.h
+++ b/speechx/speechx/frontend/feature_cache.h
@@ -15,15 +15,15 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 namespace ppspeech {
 
-class FeatureCache : public FeatureExtractorInterface {
+class FeatureCache : public FrontendInterface {
   public:
     explicit FeatureCache(
         int32 max_size = kint16max,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
+        std::unique_ptr<FrontendInterface> base_extractor = NULL);
 
     // Feed feats or waves
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
@@ -53,7 +53,7 @@ class FeatureCache : public FeatureExtractorInterface {
     bool Compute();
 
     size_t max_size_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
 
     std::mutex mutex_;
     std::queue<kaldi::Vector<BaseFloat>> cache_;
diff --git a/speechx/speechx/frontend/feature_extractor_controller.h b/speechx/speechx/frontend/feature_extractor_controller.h
deleted file mode 100644
index 0544a1e29..000000000
--- a/speechx/speechx/frontend/feature_extractor_controller.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
diff --git a/speechx/speechx/frontend/feature_extractor_controller_impl.h b/speechx/speechx/frontend/feature_extractor_controller_impl.h
deleted file mode 100644
index 0544a1e29..000000000
--- a/speechx/speechx/frontend/feature_extractor_controller_impl.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
diff --git a/speechx/speechx/frontend/feature_extractor_interface.h b/speechx/speechx/frontend/frontend_itf.h
similarity index 97%
rename from speechx/speechx/frontend/feature_extractor_interface.h
rename to speechx/speechx/frontend/frontend_itf.h
index 5da2526b9..7913cc7c0 100644
--- a/speechx/speechx/frontend/feature_extractor_interface.h
+++ b/speechx/speechx/frontend/frontend_itf.h
@@ -19,7 +19,7 @@
 
 namespace ppspeech {
 
-class FeatureExtractorInterface {
+class FrontendInterface {
   public:
     // Feed inputs: features(2D saved in 1D) or waveforms(1D).
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/linear_spectrogram.cc
index 41bc8743a..2ba00785a 100644
--- a/speechx/speechx/frontend/linear_spectrogram.cc
+++ b/speechx/speechx/frontend/linear_spectrogram.cc
@@ -27,7 +27,7 @@ using std::vector;
 
 LinearSpectrogram::LinearSpectrogram(
     const LinearSpectrogramOptions& opts,
-    std::unique_ptr<FeatureExtractorInterface> base_extractor) {
+    std::unique_ptr<FrontendInterface> base_extractor) {
     opts_ = opts;
     base_extractor_ = std::move(base_extractor);
     int32 window_size = opts.frame_opts.WindowSize();
diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/linear_spectrogram.h
index 10853904d..136441efe 100644
--- a/speechx/speechx/frontend/linear_spectrogram.h
+++ b/speechx/speechx/frontend/linear_spectrogram.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/feat/feature-window.h"
 
 namespace ppspeech {
@@ -35,11 +35,11 @@ struct LinearSpectrogramOptions {
     }
 };
 
-class LinearSpectrogram : public FeatureExtractorInterface {
+class LinearSpectrogram : public FrontendInterface {
   public:
     explicit LinearSpectrogram(
         const LinearSpectrogramOptions& opts,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor);
+        std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
     // the dim_ is the dim of single frame feature
@@ -61,7 +61,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
     std::vector<kaldi::BaseFloat> hanning_window_;
     kaldi::BaseFloat hanning_window_energy_;
     LinearSpectrogramOptions opts_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     int chunk_sample_size_;
     DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram);
 };
diff --git a/speechx/speechx/frontend/normalizer.cc b/speechx/speechx/frontend/normalizer.cc
index 524125619..26f11b692 100644
--- a/speechx/speechx/frontend/normalizer.cc
+++ b/speechx/speechx/frontend/normalizer.cc
@@ -28,7 +28,7 @@ using std::unique_ptr;
 
 DecibelNormalizer::DecibelNormalizer(
     const DecibelNormalizerOptions& opts,
-    std::unique_ptr<FeatureExtractorInterface> base_extractor) {
+    std::unique_ptr<FrontendInterface> base_extractor) {
     base_extractor_ = std::move(base_extractor);
     opts_ = opts;
     dim_ = 1;
@@ -92,7 +92,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
 }
 
 CMVN::CMVN(std::string cmvn_file,
-           unique_ptr<FeatureExtractorInterface> base_extractor)
+           unique_ptr<FrontendInterface> base_extractor)
     : var_norm_(true) {
     base_extractor_ = std::move(base_extractor);
     bool binary;
diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h
index 352d1e167..df1819612 100644
--- a/speechx/speechx/frontend/normalizer.h
+++ b/speechx/speechx/frontend/normalizer.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
@@ -40,11 +40,11 @@ struct DecibelNormalizerOptions {
     }
 };
 
-class DecibelNormalizer : public FeatureExtractorInterface {
+class DecibelNormalizer : public FrontendInterface {
   public:
     explicit DecibelNormalizer(
         const DecibelNormalizerOptions& opts,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor);
+        std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
     // noramlize audio, the dim is 1.
@@ -57,15 +57,15 @@ class DecibelNormalizer : public FeatureExtractorInterface {
     bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
     DecibelNormalizerOptions opts_;
     size_t dim_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     kaldi::Vector<kaldi::BaseFloat> waveform_;
 };
 
 
-class CMVN : public FeatureExtractorInterface {
+class CMVN : public FrontendInterface {
   public:
     explicit CMVN(std::string cmvn_file,
-                  std::unique_ptr<FeatureExtractorInterface> base_extractor);
+                  std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
 
     // the length of feats = feature_row * feature_dim,
@@ -81,7 +81,7 @@ class CMVN : public FeatureExtractorInterface {
     void Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const;
     void ApplyCMVN(kaldi::MatrixBase<BaseFloat>* feats);
     kaldi::Matrix<double> stats_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     size_t dim_;
     bool var_norm_;
 };
diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/nnet/decodable.cc
index e6315d07a..542168d24 100644
--- a/speechx/speechx/nnet/decodable.cc
+++ b/speechx/speechx/nnet/decodable.cc
@@ -22,7 +22,7 @@ using std::vector;
 using kaldi::Vector;
 
 Decodable::Decodable(const std::shared_ptr<NnetInterface>& nnet,
-                     const std::shared_ptr<FeatureExtractorInterface>& frontend)
+                     const std::shared_ptr<FrontendInterface>& frontend)
     : frontend_(frontend), nnet_(nnet), frame_offset_(0), frames_ready_(0) {}
 
 void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h
index 7938b5823..ef17601fa 100644
--- a/speechx/speechx/nnet/decodable.h
+++ b/speechx/speechx/nnet/decodable.h
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "nnet/decodable-itf.h"
 #include "nnet/nnet_interface.h"
@@ -26,7 +26,7 @@ class Decodable : public kaldi::DecodableInterface {
   public:
     explicit Decodable(
         const std::shared_ptr<NnetInterface>& nnet,
-        const std::shared_ptr<FeatureExtractorInterface>& frontend);
+        const std::shared_ptr<FrontendInterface>& frontend);
     // void Init(DecodableOpts config);
     virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
     virtual bool IsLastFrame(int32 frame) const;
@@ -41,7 +41,7 @@ class Decodable : public kaldi::DecodableInterface {
 
   private:
     bool AdvanceChunk();
-    std::shared_ptr<FeatureExtractorInterface> frontend_;
+    std::shared_ptr<FrontendInterface> frontend_;
     std::shared_ptr<NnetInterface> nnet_;
     kaldi::Matrix<kaldi::BaseFloat> nnet_cache_;
     // std::vector<std::vector<kaldi::BaseFloat>> nnet_cache_;

From 8d66a254dae27dbe32f92921fa18be24326c689c Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 1 Apr 2022 10:31:08 +0000
Subject: [PATCH 2/5] cmvn and db norm

---
 speechx/speechx/frontend/CMakeLists.txt       |  3 +-
 .../frontend/{normalizer.cc => cmvn.cc}       | 79 +--------------
 speechx/speechx/frontend/cmvn.h               | 34 +++++++
 speechx/speechx/frontend/db_norm.cc           | 95 +++++++++++++++++++
 speechx/speechx/frontend/db_norm.h            | 65 +++++++++++++
 speechx/speechx/frontend/normalizer.h         | 89 +----------------
 6 files changed, 199 insertions(+), 166 deletions(-)
 rename speechx/speechx/frontend/{normalizer.cc => cmvn.cc} (59%)
 create mode 100644 speechx/speechx/frontend/cmvn.h
 create mode 100644 speechx/speechx/frontend/db_norm.cc
 create mode 100644 speechx/speechx/frontend/db_norm.h

diff --git a/speechx/speechx/frontend/CMakeLists.txt b/speechx/speechx/frontend/CMakeLists.txt
index d0ec008ee..35243b6e3 100644
--- a/speechx/speechx/frontend/CMakeLists.txt
+++ b/speechx/speechx/frontend/CMakeLists.txt
@@ -1,7 +1,8 @@
 project(frontend)
 
 add_library(frontend STATIC
-  normalizer.cc
+  cmvn.cc
+  db_norm.cc
   linear_spectrogram.cc
   audio_cache.cc
   feature_cache.cc
diff --git a/speechx/speechx/frontend/normalizer.cc b/speechx/speechx/frontend/cmvn.cc
similarity index 59%
rename from speechx/speechx/frontend/normalizer.cc
rename to speechx/speechx/frontend/cmvn.cc
index 26f11b692..d9bba9435 100644
--- a/speechx/speechx/frontend/normalizer.cc
+++ b/speechx/speechx/frontend/cmvn.cc
@@ -1,17 +1,3 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
 
 #include "frontend/normalizer.h"
 #include "kaldi/feat/cmvn.h"
@@ -26,70 +12,7 @@ using std::vector;
 using kaldi::SubVector;
 using std::unique_ptr;
 
-DecibelNormalizer::DecibelNormalizer(
-    const DecibelNormalizerOptions& opts,
-    std::unique_ptr<FrontendInterface> base_extractor) {
-    base_extractor_ = std::move(base_extractor);
-    opts_ = opts;
-    dim_ = 1;
-}
-
-void DecibelNormalizer::Accept(const kaldi::VectorBase<BaseFloat>& waves) {
-    base_extractor_->Accept(waves);
-}
-
-bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* waves) {
-    if (base_extractor_->Read(waves) == false || waves->Dim() == 0) {
-        return false;
-    }
-    Compute(waves);
-    return true;
-}
-
-bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
-    // calculate db rms
-    BaseFloat rms_db = 0.0;
-    BaseFloat mean_square = 0.0;
-    BaseFloat gain = 0.0;
-    BaseFloat wave_float_normlization = 1.0f / (std::pow(2, 16 - 1));
-
-    vector<BaseFloat> samples;
-    samples.resize(waves->Dim());
-    for (size_t i = 0; i < samples.size(); ++i) {
-        samples[i] = (*waves)(i);
-    }
-
-    // square
-    for (auto& d : samples) {
-        if (opts_.convert_int_float) {
-            d = d * wave_float_normlization;
-        }
-        mean_square += d * d;
-    }
-
-    // mean
-    mean_square /= samples.size();
-    rms_db = 10 * std::log10(mean_square);
-    gain = opts_.target_db - rms_db;
-
-    if (gain > opts_.max_gain_db) {
-        LOG(ERROR)
-            << "Unable to normalize segment to " << opts_.target_db << "dB,"
-            << "because the the probable gain have exceeds opts_.max_gain_db"
-            << opts_.max_gain_db << "dB.";
-        return false;
-    }
-
-    // Note that this is an in-place transformation.
-    for (auto& item : samples) {
-        // python item *= 10.0 ** (gain / 20.0)
-        item *= std::pow(10.0, gain / 20.0);
-    }
 
-    std::memcpy(
-        waves->Data(), samples.data(), sizeof(BaseFloat) * samples.size());
-    return true;
-}
 
 CMVN::CMVN(std::string cmvn_file,
            unique_ptr<FrontendInterface> base_extractor)
@@ -185,4 +108,4 @@ void CMVN::ApplyCMVN(kaldi::MatrixBase<BaseFloat>* feats) {
     ApplyCmvn(stats_, var_norm_, feats);
 }
 
-}  // namespace ppspeech
+}  // namespace ppspeech
\ No newline at end of file
diff --git a/speechx/speechx/frontend/cmvn.h b/speechx/speechx/frontend/cmvn.h
new file mode 100644
index 000000000..fdf2a87a4
--- /dev/null
+++ b/speechx/speechx/frontend/cmvn.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "base/common.h"
+#include "frontend/frontend_itf.h"
+#include "kaldi/matrix/kaldi-matrix.h"
+#include "kaldi/util/options-itf.h"
+
+namespace ppspeech {
+
+class CMVN : public FrontendInterface {
+  public:
+    explicit CMVN(std::string cmvn_file,
+                  std::unique_ptr<FrontendInterface> base_extractor);
+    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
+
+    // the length of feats = feature_row * feature_dim,
+    // the Matrix is squashed into Vector
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
+    // the dim_ is the feautre dim.
+    virtual size_t Dim() const { return dim_; }
+    virtual void SetFinished() { base_extractor_->SetFinished(); }
+    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
+    virtual void Reset() { base_extractor_->Reset(); }
+
+  private:
+    void Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const;
+    void ApplyCMVN(kaldi::MatrixBase<BaseFloat>* feats);
+    kaldi::Matrix<double> stats_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
+    size_t dim_;
+    bool var_norm_;
+};
+
+}  // namespace ppspeech
\ No newline at end of file
diff --git a/speechx/speechx/frontend/db_norm.cc b/speechx/speechx/frontend/db_norm.cc
new file mode 100644
index 000000000..830af13be
--- /dev/null
+++ b/speechx/speechx/frontend/db_norm.cc
@@ -0,0 +1,95 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "frontend/normalizer.h"
+#include "kaldi/feat/cmvn.h"
+#include "kaldi/util/kaldi-io.h"
+
+namespace ppspeech {
+
+using kaldi::Vector;
+using kaldi::VectorBase;
+using kaldi::BaseFloat;
+using std::vector;
+using kaldi::SubVector;
+using std::unique_ptr;
+
+DecibelNormalizer::DecibelNormalizer(
+    const DecibelNormalizerOptions& opts,
+    std::unique_ptr<FrontendInterface> base_extractor) {
+    base_extractor_ = std::move(base_extractor);
+    opts_ = opts;
+    dim_ = 1;
+}
+
+void DecibelNormalizer::Accept(const kaldi::VectorBase<BaseFloat>& waves) {
+    base_extractor_->Accept(waves);
+}
+
+bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* waves) {
+    if (base_extractor_->Read(waves) == false || waves->Dim() == 0) {
+        return false;
+    }
+    Compute(waves);
+    return true;
+}
+
+bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
+    // calculate db rms
+    BaseFloat rms_db = 0.0;
+    BaseFloat mean_square = 0.0;
+    BaseFloat gain = 0.0;
+    BaseFloat wave_float_normlization = 1.0f / (std::pow(2, 16 - 1));
+
+    vector<BaseFloat> samples;
+    samples.resize(waves->Dim());
+    for (size_t i = 0; i < samples.size(); ++i) {
+        samples[i] = (*waves)(i);
+    }
+
+    // square
+    for (auto& d : samples) {
+        if (opts_.convert_int_float) {
+            d = d * wave_float_normlization;
+        }
+        mean_square += d * d;
+    }
+
+    // mean
+    mean_square /= samples.size();
+    rms_db = 10 * std::log10(mean_square);
+    gain = opts_.target_db - rms_db;
+
+    if (gain > opts_.max_gain_db) {
+        LOG(ERROR)
+            << "Unable to normalize segment to " << opts_.target_db << "dB,"
+            << "because the the probable gain have exceeds opts_.max_gain_db"
+            << opts_.max_gain_db << "dB.";
+        return false;
+    }
+
+    // Note that this is an in-place transformation.
+    for (auto& item : samples) {
+        // python item *= 10.0 ** (gain / 20.0)
+        item *= std::pow(10.0, gain / 20.0);
+    }
+
+    std::memcpy(
+        waves->Data(), samples.data(), sizeof(BaseFloat) * samples.size());
+    return true;
+}
+
+
+}  // namespace ppspeech
diff --git a/speechx/speechx/frontend/db_norm.h b/speechx/speechx/frontend/db_norm.h
new file mode 100644
index 000000000..3d3710715
--- /dev/null
+++ b/speechx/speechx/frontend/db_norm.h
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include "base/common.h"
+#include "frontend/frontend_itf.h"
+#include "kaldi/matrix/kaldi-matrix.h"
+#include "kaldi/util/options-itf.h"
+
+namespace ppspeech {
+
+struct DecibelNormalizerOptions {
+    float target_db;
+    float max_gain_db;
+    bool convert_int_float;
+    DecibelNormalizerOptions()
+        : target_db(-20), max_gain_db(300.0), convert_int_float(false) {}
+
+    void Register(kaldi::OptionsItf* opts) {
+        opts->Register(
+            "target-db", &target_db, "target db for db normalization");
+        opts->Register(
+            "max-gain-db", &max_gain_db, "max gain db for db normalization");
+        opts->Register("convert-int-float",
+                       &convert_int_float,
+                       "if convert int samples to float");
+    }
+};
+
+class DecibelNormalizer : public FrontendInterface {
+  public:
+    explicit DecibelNormalizer(
+        const DecibelNormalizerOptions& opts,
+        std::unique_ptr<FrontendInterface> base_extractor);
+    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
+    // noramlize audio, the dim is 1.
+    virtual size_t Dim() const { return dim_; }
+    virtual void SetFinished() { base_extractor_->SetFinished(); }
+    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
+    virtual void Reset() { base_extractor_->Reset(); }
+
+  private:
+    bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
+    DecibelNormalizerOptions opts_;
+    size_t dim_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
+    kaldi::Vector<kaldi::BaseFloat> waveform_;
+};
+
+
+}  // namespace ppspeech
\ No newline at end of file
diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h
index df1819612..89599c2a6 100644
--- a/speechx/speechx/frontend/normalizer.h
+++ b/speechx/speechx/frontend/normalizer.h
@@ -1,89 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
 #pragma once
 
-#include "base/common.h"
-#include "frontend/frontend_itf.h"
-#include "kaldi/matrix/kaldi-matrix.h"
-#include "kaldi/util/options-itf.h"
-
-namespace ppspeech {
-
-struct DecibelNormalizerOptions {
-    float target_db;
-    float max_gain_db;
-    bool convert_int_float;
-    DecibelNormalizerOptions()
-        : target_db(-20), max_gain_db(300.0), convert_int_float(false) {}
-
-    void Register(kaldi::OptionsItf* opts) {
-        opts->Register(
-            "target-db", &target_db, "target db for db normalization");
-        opts->Register(
-            "max-gain-db", &max_gain_db, "max gain db for db normalization");
-        opts->Register("convert-int-float",
-                       &convert_int_float,
-                       "if convert int samples to float");
-    }
-};
-
-class DecibelNormalizer : public FrontendInterface {
-  public:
-    explicit DecibelNormalizer(
-        const DecibelNormalizerOptions& opts,
-        std::unique_ptr<FrontendInterface> base_extractor);
-    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
-    // noramlize audio, the dim is 1.
-    virtual size_t Dim() const { return dim_; }
-    virtual void SetFinished() { base_extractor_->SetFinished(); }
-    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
-    virtual void Reset() { base_extractor_->Reset(); }
-
-  private:
-    bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
-    DecibelNormalizerOptions opts_;
-    size_t dim_;
-    std::unique_ptr<FrontendInterface> base_extractor_;
-    kaldi::Vector<kaldi::BaseFloat> waveform_;
-};
-
-
-class CMVN : public FrontendInterface {
-  public:
-    explicit CMVN(std::string cmvn_file,
-                  std::unique_ptr<FrontendInterface> base_extractor);
-    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
-
-    // the length of feats = feature_row * feature_dim,
-    // the Matrix is squashed into Vector
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
-    // the dim_ is the feautre dim.
-    virtual size_t Dim() const { return dim_; }
-    virtual void SetFinished() { base_extractor_->SetFinished(); }
-    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
-    virtual void Reset() { base_extractor_->Reset(); }
-
-  private:
-    void Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const;
-    void ApplyCMVN(kaldi::MatrixBase<BaseFloat>* feats);
-    kaldi::Matrix<double> stats_;
-    std::unique_ptr<FrontendInterface> base_extractor_;
-    size_t dim_;
-    bool var_norm_;
-};
-
-}  // namespace ppspeech
\ No newline at end of file
+#include "frontend/cmvn.h"
+#include "frontend/db_norm.h"
\ No newline at end of file

From 42c7537ce629a7fc717d27e5fc36073d86f8fce6 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 1 Apr 2022 10:43:39 +0000
Subject: [PATCH 3/5] frontend to audio dir

---
 speechx/examples/decoder/offline_decoder_main.cc  |  2 +-
 .../decoder/offline_decoder_sliding_chunk_main.cc |  2 +-
 speechx/examples/feat/linear_spectrogram_main.cc  | 13 ++++++-------
 speechx/speechx/frontend/CMakeLists.txt           | 11 +----------
 speechx/speechx/frontend/audio/CMakeLists.txt     | 11 +++++++++++
 .../speechx/frontend/{ => audio}/audio_cache.cc   |  2 +-
 .../speechx/frontend/{ => audio}/audio_cache.h    |  2 +-
 speechx/speechx/frontend/{ => audio}/cmvn.cc      |  3 +--
 speechx/speechx/frontend/{ => audio}/cmvn.h       |  2 +-
 speechx/speechx/frontend/{ => audio}/data_cache.h |  2 +-
 speechx/speechx/frontend/{ => audio}/db_norm.cc   |  2 +-
 speechx/speechx/frontend/{ => audio}/db_norm.h    |  2 +-
 speechx/speechx/frontend/{ => audio}/fbank.h      |  0
 .../speechx/frontend/{ => audio}/feature_cache.cc |  2 +-
 .../speechx/frontend/{ => audio}/feature_cache.h  |  2 +-
 .../speechx/frontend/{ => audio}/frontend_itf.h   |  0
 .../frontend/{ => audio}/linear_spectrogram.cc    |  2 +-
 .../frontend/{ => audio}/linear_spectrogram.h     |  2 +-
 speechx/speechx/frontend/{ => audio}/mfcc.h       |  0
 speechx/speechx/frontend/audio/normalizer.h       |  4 ++++
 speechx/speechx/frontend/normalizer.h             |  4 ----
 speechx/speechx/frontend/window.h                 | 15 ---------------
 speechx/speechx/nnet/decodable.h                  |  2 +-
 23 files changed, 36 insertions(+), 51 deletions(-)
 rename speechx/speechx/frontend/{ => audio}/audio_cache.cc (98%)
 rename speechx/speechx/frontend/{ => audio}/audio_cache.h (97%)
 rename speechx/speechx/frontend/{ => audio}/cmvn.cc (98%)
 rename speechx/speechx/frontend/{ => audio}/cmvn.h (94%)
 rename speechx/speechx/frontend/{ => audio}/data_cache.h (97%)
 rename speechx/speechx/frontend/{ => audio}/db_norm.cc (98%)
 rename speechx/speechx/frontend/{ => audio}/db_norm.h (97%)
 rename speechx/speechx/frontend/{ => audio}/fbank.h (100%)
 rename speechx/speechx/frontend/{ => audio}/feature_cache.cc (97%)
 rename speechx/speechx/frontend/{ => audio}/feature_cache.h (97%)
 rename speechx/speechx/frontend/{ => audio}/frontend_itf.h (100%)
 rename speechx/speechx/frontend/{ => audio}/linear_spectrogram.cc (98%)
 rename speechx/speechx/frontend/{ => audio}/linear_spectrogram.h (97%)
 rename speechx/speechx/frontend/{ => audio}/mfcc.h (100%)
 create mode 100644 speechx/speechx/frontend/audio/normalizer.h
 delete mode 100644 speechx/speechx/frontend/normalizer.h
 delete mode 100644 speechx/speechx/frontend/window.h

diff --git a/speechx/examples/decoder/offline_decoder_main.cc b/speechx/examples/decoder/offline_decoder_main.cc
index 6bd83b9b1..9a9c14a0c 100644
--- a/speechx/examples/decoder/offline_decoder_main.cc
+++ b/speechx/examples/decoder/offline_decoder_main.cc
@@ -17,7 +17,7 @@
 #include "base/flags.h"
 #include "base/log.h"
 #include "decoder/ctc_beam_search_decoder.h"
-#include "frontend/data_cache.h"
+#include "frontend/audio/data_cache.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
 #include "nnet/paddle_nnet.h"
diff --git a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
index 4d5ffe145..7f6c572ca 100644
--- a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
+++ b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
@@ -17,7 +17,7 @@
 #include "base/flags.h"
 #include "base/log.h"
 #include "decoder/ctc_beam_search_decoder.h"
-#include "frontend/data_cache.h"
+#include "frontend/audio/data_cache.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
 #include "nnet/paddle_nnet.h"
diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc
index c29d2b21f..8f32bac2a 100644
--- a/speechx/examples/feat/linear_spectrogram_main.cc
+++ b/speechx/examples/feat/linear_spectrogram_main.cc
@@ -14,19 +14,18 @@
 
 // todo refactor, repalce with gtest
 
-#include "frontend/linear_spectrogram.h"
 #include "base/flags.h"
 #include "base/log.h"
-#include "frontend/audio_cache.h"
-#include "frontend/data_cache.h"
-#include "frontend/feature_cache.h"
-#include "frontend/frontend_itf.h"
-#include "frontend/normalizer.h"
 #include "kaldi/feat/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
 #include "kaldi/util/table-types.h"
 
-#include <glog/logging.h>
+#include "frontend/audio/linear_spectrogram.h"
+#include "frontend/audio/audio_cache.h"
+#include "frontend/audio/data_cache.h"
+#include "frontend/audio/feature_cache.h"
+#include "frontend/audio/frontend_itf.h"
+#include "frontend/audio/normalizer.h"
 
 DEFINE_string(wav_rspecifier, "", "test wav scp path");
 DEFINE_string(feature_wspecifier, "", "output feats wspecifier");
diff --git a/speechx/speechx/frontend/CMakeLists.txt b/speechx/speechx/frontend/CMakeLists.txt
index 35243b6e3..7d10fdec9 100644
--- a/speechx/speechx/frontend/CMakeLists.txt
+++ b/speechx/speechx/frontend/CMakeLists.txt
@@ -1,11 +1,2 @@
-project(frontend)
 
-add_library(frontend STATIC
-  cmvn.cc
-  db_norm.cc
-  linear_spectrogram.cc
-  audio_cache.cc
-  feature_cache.cc
-)
-
-target_link_libraries(frontend PUBLIC kaldi-matrix)
\ No newline at end of file
+add_subdirectory(audio)
\ No newline at end of file
diff --git a/speechx/speechx/frontend/audio/CMakeLists.txt b/speechx/speechx/frontend/audio/CMakeLists.txt
index e69de29bb..35243b6e3 100644
--- a/speechx/speechx/frontend/audio/CMakeLists.txt
+++ b/speechx/speechx/frontend/audio/CMakeLists.txt
@@ -0,0 +1,11 @@
+project(frontend)
+
+add_library(frontend STATIC
+  cmvn.cc
+  db_norm.cc
+  linear_spectrogram.cc
+  audio_cache.cc
+  feature_cache.cc
+)
+
+target_link_libraries(frontend PUBLIC kaldi-matrix)
\ No newline at end of file
diff --git a/speechx/speechx/frontend/audio_cache.cc b/speechx/speechx/frontend/audio/audio_cache.cc
similarity index 98%
rename from speechx/speechx/frontend/audio_cache.cc
rename to speechx/speechx/frontend/audio/audio_cache.cc
index d44ed592c..c3233e595 100644
--- a/speechx/speechx/frontend/audio_cache.cc
+++ b/speechx/speechx/frontend/audio/audio_cache.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "frontend/audio_cache.h"
+#include "frontend/audio/audio_cache.h"
 #include "kaldi/base/timer.h"
 
 namespace ppspeech {
diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio/audio_cache.h
similarity index 97%
rename from speechx/speechx/frontend/audio_cache.h
rename to speechx/speechx/frontend/audio/audio_cache.h
index f48da12b7..17e1a8389 100644
--- a/speechx/speechx/frontend/audio_cache.h
+++ b/speechx/speechx/frontend/audio/audio_cache.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 
 namespace ppspeech {
 
diff --git a/speechx/speechx/frontend/cmvn.cc b/speechx/speechx/frontend/audio/cmvn.cc
similarity index 98%
rename from speechx/speechx/frontend/cmvn.cc
rename to speechx/speechx/frontend/audio/cmvn.cc
index d9bba9435..706492b7c 100644
--- a/speechx/speechx/frontend/cmvn.cc
+++ b/speechx/speechx/frontend/audio/cmvn.cc
@@ -1,5 +1,5 @@
 
-#include "frontend/normalizer.h"
+#include "frontend/audio/cmvn.h"
 #include "kaldi/feat/cmvn.h"
 #include "kaldi/util/kaldi-io.h"
 
@@ -13,7 +13,6 @@ using kaldi::SubVector;
 using std::unique_ptr;
 
 
-
 CMVN::CMVN(std::string cmvn_file,
            unique_ptr<FrontendInterface> base_extractor)
     : var_norm_(true) {
diff --git a/speechx/speechx/frontend/cmvn.h b/speechx/speechx/frontend/audio/cmvn.h
similarity index 94%
rename from speechx/speechx/frontend/cmvn.h
rename to speechx/speechx/frontend/audio/cmvn.h
index fdf2a87a4..b3cfbb11a 100644
--- a/speechx/speechx/frontend/cmvn.h
+++ b/speechx/speechx/frontend/audio/cmvn.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/audio/data_cache.h
similarity index 97%
rename from speechx/speechx/frontend/data_cache.h
rename to speechx/speechx/frontend/audio/data_cache.h
index b8ce6bf65..a812278ce 100644
--- a/speechx/speechx/frontend/data_cache.h
+++ b/speechx/speechx/frontend/audio/data_cache.h
@@ -17,7 +17,7 @@
 
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 
 
 namespace ppspeech {
diff --git a/speechx/speechx/frontend/db_norm.cc b/speechx/speechx/frontend/audio/db_norm.cc
similarity index 98%
rename from speechx/speechx/frontend/db_norm.cc
rename to speechx/speechx/frontend/audio/db_norm.cc
index 830af13be..931e932d6 100644
--- a/speechx/speechx/frontend/db_norm.cc
+++ b/speechx/speechx/frontend/audio/db_norm.cc
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 
-#include "frontend/normalizer.h"
+#include "frontend/audio/db_norm.h"
 #include "kaldi/feat/cmvn.h"
 #include "kaldi/util/kaldi-io.h"
 
diff --git a/speechx/speechx/frontend/db_norm.h b/speechx/speechx/frontend/audio/db_norm.h
similarity index 97%
rename from speechx/speechx/frontend/db_norm.h
rename to speechx/speechx/frontend/audio/db_norm.h
index 3d3710715..425971437 100644
--- a/speechx/speechx/frontend/db_norm.h
+++ b/speechx/speechx/frontend/audio/db_norm.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/audio/fbank.h
similarity index 100%
rename from speechx/speechx/frontend/fbank.h
rename to speechx/speechx/frontend/audio/fbank.h
diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/audio/feature_cache.cc
similarity index 97%
rename from speechx/speechx/frontend/feature_cache.cc
rename to speechx/speechx/frontend/audio/feature_cache.cc
index 53b7076d5..d7bea61ad 100644
--- a/speechx/speechx/frontend/feature_cache.cc
+++ b/speechx/speechx/frontend/audio/feature_cache.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "frontend/feature_cache.h"
+#include "frontend/audio/feature_cache.h"
 
 namespace ppspeech {
 
diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/audio/feature_cache.h
similarity index 97%
rename from speechx/speechx/frontend/feature_cache.h
rename to speechx/speechx/frontend/audio/feature_cache.h
index 1281ec35a..99961b5e2 100644
--- a/speechx/speechx/frontend/feature_cache.h
+++ b/speechx/speechx/frontend/audio/feature_cache.h
@@ -15,7 +15,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 
 namespace ppspeech {
 
diff --git a/speechx/speechx/frontend/frontend_itf.h b/speechx/speechx/frontend/audio/frontend_itf.h
similarity index 100%
rename from speechx/speechx/frontend/frontend_itf.h
rename to speechx/speechx/frontend/audio/frontend_itf.h
diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/audio/linear_spectrogram.cc
similarity index 98%
rename from speechx/speechx/frontend/linear_spectrogram.cc
rename to speechx/speechx/frontend/audio/linear_spectrogram.cc
index 2ba00785a..827b8eccf 100644
--- a/speechx/speechx/frontend/linear_spectrogram.cc
+++ b/speechx/speechx/frontend/audio/linear_spectrogram.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "frontend/linear_spectrogram.h"
+#include "frontend/audio/linear_spectrogram.h"
 #include "kaldi/base/kaldi-math.h"
 #include "kaldi/matrix/matrix-functions.h"
 
diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/audio/linear_spectrogram.h
similarity index 97%
rename from speechx/speechx/frontend/linear_spectrogram.h
rename to speechx/speechx/frontend/audio/linear_spectrogram.h
index 136441efe..bbf8d6853 100644
--- a/speechx/speechx/frontend/linear_spectrogram.h
+++ b/speechx/speechx/frontend/audio/linear_spectrogram.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/feat/feature-window.h"
 
 namespace ppspeech {
diff --git a/speechx/speechx/frontend/mfcc.h b/speechx/speechx/frontend/audio/mfcc.h
similarity index 100%
rename from speechx/speechx/frontend/mfcc.h
rename to speechx/speechx/frontend/audio/mfcc.h
diff --git a/speechx/speechx/frontend/audio/normalizer.h b/speechx/speechx/frontend/audio/normalizer.h
new file mode 100644
index 000000000..df9e4b751
--- /dev/null
+++ b/speechx/speechx/frontend/audio/normalizer.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#include "frontend/audio/cmvn.h"
+#include "frontend/audio/db_norm.h"
\ No newline at end of file
diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h
deleted file mode 100644
index 89599c2a6..000000000
--- a/speechx/speechx/frontend/normalizer.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-#include "frontend/cmvn.h"
-#include "frontend/db_norm.h"
\ No newline at end of file
diff --git a/speechx/speechx/frontend/window.h b/speechx/speechx/frontend/window.h
deleted file mode 100644
index 70d6307ec..000000000
--- a/speechx/speechx/frontend/window.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// extract the window of kaldi feat.
diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h
index ef17601fa..c75a0f4de 100644
--- a/speechx/speechx/nnet/decodable.h
+++ b/speechx/speechx/nnet/decodable.h
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "nnet/decodable-itf.h"
 #include "nnet/nnet_interface.h"

From 9071b9597de42bccbd34202d664ee834fb5fc34b Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 1 Apr 2022 10:49:03 +0000
Subject: [PATCH 4/5] format code

---
 .../examples/feat/linear_spectrogram_main.cc    |  7 +++----
 speechx/speechx/frontend/audio/cmvn.cc          | 17 +++++++++++++++--
 speechx/speechx/frontend/audio/cmvn.h           | 14 ++++++++++++++
 speechx/speechx/frontend/audio/feature_cache.cc |  4 ++--
 speechx/speechx/frontend/audio/normalizer.h     | 14 ++++++++++++++
 speechx/speechx/nnet/decodable.h                |  5 ++---
 6 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc
index 8f32bac2a..ca76d85c7 100644
--- a/speechx/examples/feat/linear_spectrogram_main.cc
+++ b/speechx/examples/feat/linear_spectrogram_main.cc
@@ -20,11 +20,11 @@
 #include "kaldi/util/kaldi-io.h"
 #include "kaldi/util/table-types.h"
 
-#include "frontend/audio/linear_spectrogram.h"
 #include "frontend/audio/audio_cache.h"
 #include "frontend/audio/data_cache.h"
 #include "frontend/audio/feature_cache.h"
 #include "frontend/audio/frontend_itf.h"
+#include "frontend/audio/linear_spectrogram.h"
 #include "frontend/audio/normalizer.h"
 
 DEFINE_string(wav_rspecifier, "", "test wav scp path");
@@ -187,9 +187,8 @@ int main(int argc, char* argv[]) {
     std::unique_ptr<ppspeech::FrontendInterface> linear_spectrogram(
         new ppspeech::LinearSpectrogram(opt, std::move(db_norm)));
 
-    std::unique_ptr<ppspeech::FrontendInterface> cmvn(
-        new ppspeech::CMVN(FLAGS_cmvn_write_path,
-                           std::move(linear_spectrogram)));
+    std::unique_ptr<ppspeech::FrontendInterface> cmvn(new ppspeech::CMVN(
+        FLAGS_cmvn_write_path, std::move(linear_spectrogram)));
 
     ppspeech::FeatureCache feature_cache(kint16max, std::move(cmvn));
     LOG(INFO) << "feat dim: " << feature_cache.Dim();
diff --git a/speechx/speechx/frontend/audio/cmvn.cc b/speechx/speechx/frontend/audio/cmvn.cc
index 706492b7c..4c1ffd6a1 100644
--- a/speechx/speechx/frontend/audio/cmvn.cc
+++ b/speechx/speechx/frontend/audio/cmvn.cc
@@ -1,3 +1,17 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 
 #include "frontend/audio/cmvn.h"
 #include "kaldi/feat/cmvn.h"
@@ -13,8 +27,7 @@ using kaldi::SubVector;
 using std::unique_ptr;
 
 
-CMVN::CMVN(std::string cmvn_file,
-           unique_ptr<FrontendInterface> base_extractor)
+CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
     : var_norm_(true) {
     base_extractor_ = std::move(base_extractor);
     bool binary;
diff --git a/speechx/speechx/frontend/audio/cmvn.h b/speechx/speechx/frontend/audio/cmvn.h
index b3cfbb11a..50ef5649b 100644
--- a/speechx/speechx/frontend/audio/cmvn.h
+++ b/speechx/speechx/frontend/audio/cmvn.h
@@ -1,3 +1,17 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #pragma once
 
 #include "base/common.h"
diff --git a/speechx/speechx/frontend/audio/feature_cache.cc b/speechx/speechx/frontend/audio/feature_cache.cc
index d7bea61ad..3f7f6502b 100644
--- a/speechx/speechx/frontend/audio/feature_cache.cc
+++ b/speechx/speechx/frontend/audio/feature_cache.cc
@@ -23,8 +23,8 @@ using std::vector;
 using kaldi::SubVector;
 using std::unique_ptr;
 
-FeatureCache::FeatureCache(
-    int max_size, unique_ptr<FrontendInterface> base_extractor) {
+FeatureCache::FeatureCache(int max_size,
+                           unique_ptr<FrontendInterface> base_extractor) {
     max_size_ = max_size;
     base_extractor_ = std::move(base_extractor);
 }
diff --git a/speechx/speechx/frontend/audio/normalizer.h b/speechx/speechx/frontend/audio/normalizer.h
index df9e4b751..dcf721dd2 100644
--- a/speechx/speechx/frontend/audio/normalizer.h
+++ b/speechx/speechx/frontend/audio/normalizer.h
@@ -1,3 +1,17 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #pragma once
 
 #include "frontend/audio/cmvn.h"
diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h
index c75a0f4de..3f0aab047 100644
--- a/speechx/speechx/nnet/decodable.h
+++ b/speechx/speechx/nnet/decodable.h
@@ -24,9 +24,8 @@ struct DecodableOpts;
 
 class Decodable : public kaldi::DecodableInterface {
   public:
-    explicit Decodable(
-        const std::shared_ptr<NnetInterface>& nnet,
-        const std::shared_ptr<FrontendInterface>& frontend);
+    explicit Decodable(const std::shared_ptr<NnetInterface>& nnet,
+                       const std::shared_ptr<FrontendInterface>& frontend);
     // void Init(DecodableOpts config);
     virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
     virtual bool IsLastFrame(int32 frame) const;

From f83ec41161ef3ef8969495f34587ca6870f7cc79 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 1 Apr 2022 11:11:08 +0000
Subject: [PATCH 5/5] rename nnet itf

---
 speechx/speechx/nnet/{nnet_interface.h => nnet_itf.h} | 0
 speechx/speechx/nnet/paddle_nnet.h                    | 7 ++++---
 2 files changed, 4 insertions(+), 3 deletions(-)
 rename speechx/speechx/nnet/{nnet_interface.h => nnet_itf.h} (100%)

diff --git a/speechx/speechx/nnet/nnet_interface.h b/speechx/speechx/nnet/nnet_itf.h
similarity index 100%
rename from speechx/speechx/nnet/nnet_interface.h
rename to speechx/speechx/nnet/nnet_itf.h
diff --git a/speechx/speechx/nnet/paddle_nnet.h b/speechx/speechx/nnet/paddle_nnet.h
index 30fbac9f1..906994d06 100644
--- a/speechx/speechx/nnet/paddle_nnet.h
+++ b/speechx/speechx/nnet/paddle_nnet.h
@@ -15,13 +15,14 @@
 
 #pragma once
 
-#include "base/common.h"
-#include "nnet/nnet_interface.h"
-#include "paddle_inference_api.h"
 
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
+#include "base/common.h"
+#include "nnet/nnet_itf.h"
+#include "paddle_inference_api.h"
+
 #include <numeric>
 
 namespace ppspeech {