From a9f4ce47a34bbd62c88090ef9a6e3498dbfc669a Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 1 Apr 2022 10:24:16 +0000
Subject: [PATCH] frontend itf

---
 speechx/examples/feat/linear_spectrogram_main.cc   | 12 ++++++------
 speechx/speechx/frontend/audio_cache.h             |  4 ++--
 speechx/speechx/frontend/data_cache.h              |  4 ++--
 speechx/speechx/frontend/fbank.h                   |  4 ++--
 speechx/speechx/frontend/feature_cache.cc          |  2 +-
 speechx/speechx/frontend/feature_cache.h           |  8 ++++----
 .../frontend/feature_extractor_controller.h        | 13 -------------
 .../frontend/feature_extractor_controller_impl.h   | 13 -------------
 ...eature_extractor_interface.h => frontend_itf.h} |  2 +-
 speechx/speechx/frontend/linear_spectrogram.cc     |  2 +-
 speechx/speechx/frontend/linear_spectrogram.h      |  8 ++++----
 speechx/speechx/frontend/normalizer.cc             |  4 ++--
 speechx/speechx/frontend/normalizer.h              | 14 +++++++-------
 speechx/speechx/nnet/decodable.cc                  |  2 +-
 speechx/speechx/nnet/decodable.h                   |  6 +++---
 15 files changed, 36 insertions(+), 62 deletions(-)
 delete mode 100644 speechx/speechx/frontend/feature_extractor_controller.h
 delete mode 100644 speechx/speechx/frontend/feature_extractor_controller_impl.h
 rename speechx/speechx/frontend/{feature_extractor_interface.h => frontend_itf.h} (97%)

diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc
index e1f0a895..c29d2b21 100644
--- a/speechx/examples/feat/linear_spectrogram_main.cc
+++ b/speechx/examples/feat/linear_spectrogram_main.cc
@@ -20,7 +20,7 @@
 #include "frontend/audio_cache.h"
 #include "frontend/data_cache.h"
 #include "frontend/feature_cache.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "frontend/normalizer.h"
 #include "kaldi/feat/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
@@ -170,13 +170,13 @@ int main(int argc, char* argv[]) {
     // feature pipeline: wave cache --> decibel_normalizer --> hanning
     // window -->linear_spectrogram --> global cmvn -> feat cache
 
-    // std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
+    // std::unique_ptr<ppspeech::FrontendInterface> data_source(new
     // ppspeech::DataCache());
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(
+    std::unique_ptr<ppspeech::FrontendInterface> data_source(
         new ppspeech::AudioCache());
 
     ppspeech::DecibelNormalizerOptions db_norm_opt;
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> db_norm(
+    std::unique_ptr<ppspeech::FrontendInterface> db_norm(
         new ppspeech::DecibelNormalizer(db_norm_opt, std::move(data_source)));
 
     ppspeech::LinearSpectrogramOptions opt;
@@ -185,10 +185,10 @@ int main(int argc, char* argv[]) {
     LOG(INFO) << "frame length (ms): " << opt.frame_opts.frame_length_ms;
     LOG(INFO) << "frame shift (ms): " << opt.frame_opts.frame_shift_ms;
 
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> linear_spectrogram(
+    std::unique_ptr<ppspeech::FrontendInterface> linear_spectrogram(
         new ppspeech::LinearSpectrogram(opt, std::move(db_norm)));
 
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> cmvn(
+    std::unique_ptr<ppspeech::FrontendInterface> cmvn(
         new ppspeech::CMVN(FLAGS_cmvn_write_path,
                            std::move(linear_spectrogram)));
 
diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio_cache.h
index b6c82c69..f48da12b 100644
--- a/speechx/speechx/frontend/audio_cache.h
+++ b/speechx/speechx/frontend/audio_cache.h
@@ -16,12 +16,12 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 namespace ppspeech {
 
 // waves cache
-class AudioCache : public FeatureExtractorInterface {
+class AudioCache : public FrontendInterface {
   public:
     explicit AudioCache(int buffer_size = kint16max);
 
diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/data_cache.h
index dea51d76..b8ce6bf6 100644
--- a/speechx/speechx/frontend/data_cache.h
+++ b/speechx/speechx/frontend/data_cache.h
@@ -17,13 +17,13 @@
 
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 
 namespace ppspeech {
 // A data source for testing different frontend module.
 // It accepts waves or feats.
-class DataCache : public FeatureExtractorInterface {
+class DataCache : public FrontendInterface {
   public:
     explicit DataCache() { finished_ = false; }
 
diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/fbank.h
index 7d9cf422..68267b3d 100644
--- a/speechx/speechx/frontend/fbank.h
+++ b/speechx/speechx/frontend/fbank.h
@@ -20,10 +20,10 @@
 
 namespace ppspeech {
 
-class FbankExtractor : FeatureExtractorInterface {
+class FbankExtractor : FrontendInterface {
   public:
     explicit FbankExtractor(const FbankOptions& opts,
-                            share_ptr<FeatureExtractorInterface> pre_extractor);
+                            share_ptr<FrontendInterface> pre_extractor);
     virtual void AcceptWaveform(
         const kaldi::Vector<kaldi::BaseFloat>& input) = 0;
     virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat) = 0;
diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/feature_cache.cc
index dad6907c..53b7076d 100644
--- a/speechx/speechx/frontend/feature_cache.cc
+++ b/speechx/speechx/frontend/feature_cache.cc
@@ -24,7 +24,7 @@ using kaldi::SubVector;
 using std::unique_ptr;
 
 FeatureCache::FeatureCache(
-    int max_size, unique_ptr<FeatureExtractorInterface> base_extractor) {
+    int max_size, unique_ptr<FrontendInterface> base_extractor) {
     max_size_ = max_size;
     base_extractor_ = std::move(base_extractor);
 }
diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/feature_cache.h
index f52b9b0f..1281ec35 100644
--- a/speechx/speechx/frontend/feature_cache.h
+++ b/speechx/speechx/frontend/feature_cache.h
@@ -15,15 +15,15 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 namespace ppspeech {
 
-class FeatureCache : public FeatureExtractorInterface {
+class FeatureCache : public FrontendInterface {
   public:
     explicit FeatureCache(
         int32 max_size = kint16max,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
+        std::unique_ptr<FrontendInterface> base_extractor = NULL);
 
     // Feed feats or waves
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
@@ -53,7 +53,7 @@ class FeatureCache : public FeatureExtractorInterface {
     bool Compute();
 
     size_t max_size_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
 
     std::mutex mutex_;
     std::queue<kaldi::Vector<BaseFloat>> cache_;
diff --git a/speechx/speechx/frontend/feature_extractor_controller.h b/speechx/speechx/frontend/feature_extractor_controller.h
deleted file mode 100644
index 0544a1e2..00000000
--- a/speechx/speechx/frontend/feature_extractor_controller.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
diff --git a/speechx/speechx/frontend/feature_extractor_controller_impl.h b/speechx/speechx/frontend/feature_extractor_controller_impl.h
deleted file mode 100644
index 0544a1e2..00000000
--- a/speechx/speechx/frontend/feature_extractor_controller_impl.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
diff --git a/speechx/speechx/frontend/feature_extractor_interface.h b/speechx/speechx/frontend/frontend_itf.h
similarity index 97%
rename from speechx/speechx/frontend/feature_extractor_interface.h
rename to speechx/speechx/frontend/frontend_itf.h
index 5da2526b..7913cc7c 100644
--- a/speechx/speechx/frontend/feature_extractor_interface.h
+++ b/speechx/speechx/frontend/frontend_itf.h
@@ -19,7 +19,7 @@
 
 namespace ppspeech {
 
-class FeatureExtractorInterface {
+class FrontendInterface {
   public:
     // Feed inputs: features(2D saved in 1D) or waveforms(1D).
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/linear_spectrogram.cc
index 41bc8743..2ba00785 100644
--- a/speechx/speechx/frontend/linear_spectrogram.cc
+++ b/speechx/speechx/frontend/linear_spectrogram.cc
@@ -27,7 +27,7 @@ using std::vector;
 
 LinearSpectrogram::LinearSpectrogram(
     const LinearSpectrogramOptions& opts,
-    std::unique_ptr<FeatureExtractorInterface> base_extractor) {
+    std::unique_ptr<FrontendInterface> base_extractor) {
     opts_ = opts;
     base_extractor_ = std::move(base_extractor);
     int32 window_size = opts.frame_opts.WindowSize();
diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/linear_spectrogram.h
index 10853904..136441ef 100644
--- a/speechx/speechx/frontend/linear_spectrogram.h
+++ b/speechx/speechx/frontend/linear_spectrogram.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/feat/feature-window.h"
 
 namespace ppspeech {
@@ -35,11 +35,11 @@ struct LinearSpectrogramOptions {
     }
 };
 
-class LinearSpectrogram : public FeatureExtractorInterface {
+class LinearSpectrogram : public FrontendInterface {
   public:
     explicit LinearSpectrogram(
         const LinearSpectrogramOptions& opts,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor);
+        std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
     // the dim_ is the dim of single frame feature
@@ -61,7 +61,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
     std::vector<kaldi::BaseFloat> hanning_window_;
     kaldi::BaseFloat hanning_window_energy_;
     LinearSpectrogramOptions opts_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     int chunk_sample_size_;
     DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram);
 };
diff --git a/speechx/speechx/frontend/normalizer.cc b/speechx/speechx/frontend/normalizer.cc
index 52412561..26f11b69 100644
--- a/speechx/speechx/frontend/normalizer.cc
+++ b/speechx/speechx/frontend/normalizer.cc
@@ -28,7 +28,7 @@ using std::unique_ptr;
 
 DecibelNormalizer::DecibelNormalizer(
     const DecibelNormalizerOptions& opts,
-    std::unique_ptr<FeatureExtractorInterface> base_extractor) {
+    std::unique_ptr<FrontendInterface> base_extractor) {
     base_extractor_ = std::move(base_extractor);
     opts_ = opts;
     dim_ = 1;
@@ -92,7 +92,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
 }
 
 CMVN::CMVN(std::string cmvn_file,
-           unique_ptr<FeatureExtractorInterface> base_extractor)
+           unique_ptr<FrontendInterface> base_extractor)
     : var_norm_(true) {
     base_extractor_ = std::move(base_extractor);
     bool binary;
diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h
index 352d1e16..df181961 100644
--- a/speechx/speechx/frontend/normalizer.h
+++ b/speechx/speechx/frontend/normalizer.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
@@ -40,11 +40,11 @@ struct DecibelNormalizerOptions {
     }
 };
 
-class DecibelNormalizer : public FeatureExtractorInterface {
+class DecibelNormalizer : public FrontendInterface {
   public:
     explicit DecibelNormalizer(
         const DecibelNormalizerOptions& opts,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor);
+        std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
     // noramlize audio, the dim is 1.
@@ -57,15 +57,15 @@ class DecibelNormalizer : public FeatureExtractorInterface {
     bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
     DecibelNormalizerOptions opts_;
     size_t dim_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     kaldi::Vector<kaldi::BaseFloat> waveform_;
 };
 
 
-class CMVN : public FeatureExtractorInterface {
+class CMVN : public FrontendInterface {
   public:
     explicit CMVN(std::string cmvn_file,
-                  std::unique_ptr<FeatureExtractorInterface> base_extractor);
+                  std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
 
     // the length of feats = feature_row * feature_dim,
@@ -81,7 +81,7 @@ class CMVN : public FeatureExtractorInterface {
     void Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const;
     void ApplyCMVN(kaldi::MatrixBase<BaseFloat>* feats);
     kaldi::Matrix<double> stats_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     size_t dim_;
     bool var_norm_;
 };
diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/nnet/decodable.cc
index e6315d07..542168d2 100644
--- a/speechx/speechx/nnet/decodable.cc
+++ b/speechx/speechx/nnet/decodable.cc
@@ -22,7 +22,7 @@ using std::vector;
 using kaldi::Vector;
 
 Decodable::Decodable(const std::shared_ptr<NnetInterface>& nnet,
-                     const std::shared_ptr<FeatureExtractorInterface>& frontend)
+                     const std::shared_ptr<FrontendInterface>& frontend)
     : frontend_(frontend), nnet_(nnet), frame_offset_(0), frames_ready_(0) {}
 
 void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h
index 7938b582..ef17601f 100644
--- a/speechx/speechx/nnet/decodable.h
+++ b/speechx/speechx/nnet/decodable.h
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "nnet/decodable-itf.h"
 #include "nnet/nnet_interface.h"
@@ -26,7 +26,7 @@ class Decodable : public kaldi::DecodableInterface {
   public:
     explicit Decodable(
         const std::shared_ptr<NnetInterface>& nnet,
-        const std::shared_ptr<FeatureExtractorInterface>& frontend);
+        const std::shared_ptr<FrontendInterface>& frontend);
     // void Init(DecodableOpts config);
     virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
     virtual bool IsLastFrame(int32 frame) const;
@@ -41,7 +41,7 @@ class Decodable : public kaldi::DecodableInterface {
 
   private:
     bool AdvanceChunk();
-    std::shared_ptr<FeatureExtractorInterface> frontend_;
+    std::shared_ptr<FrontendInterface> frontend_;
     std::shared_ptr<NnetInterface> nnet_;
     kaldi::Matrix<kaldi::BaseFloat> nnet_cache_;
     // std::vector<std::vector<kaldi::BaseFloat>> nnet_cache_;