refactor frontend

3 years ago · 156ccfe4e3
parent 2fd904e9be
commit 156ccfe4e3
11 changed files with 238 additions and 242 deletions
--- a/speechx/speechx/decoder/param.h
+++ b/speechx/speechx/decoder/param.h
@ -62,7 +62,6 @@ namespace ppspeech {
 FeaturePipelineOptions InitFeaturePipelineOptions() {
    FeaturePipelineOptions opts;
    opts.cmvn_file = FLAGS_cmvn_file;
    opts.linear_spectrogram_opts.streaming_chunk = FLAGS_streaming_chunk;
    kaldi::FrameExtractionOptions frame_opts;
    frame_opts.dither = 0.0;
    frame_opts.frame_shift_ms = 10;
@ -71,8 +70,8 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
        opts.to_float32 = false;
        frame_opts.window_type = "povey";
        frame_opts.frame_length_ms = 25;
-        opts.fbank_opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
+        opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
-        opts.fbank_opts.fbank_opts.frame_opts = frame_opts;
+        opts.fbank_opts.frame_opts = frame_opts;
    } else {
        opts.to_float32 = true;
        frame_opts.remove_dc_offset = false;
--- a/speechx/speechx/frontend/audio/audio_cache.h
+++ b/speechx/speechx/frontend/audio/audio_cache.h
@ -30,8 +30,8 @@ class AudioCache : public FrontendInterface {
    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
-    // the audio dim is 1, one sample
+    // the audio dim is 1, one sample, we return size_ instead.
-    virtual size_t Dim() const { return 1; }
+    virtual size_t Dim() const { return size_; }
    virtual void SetFinished() {
        std::lock_guard<std::mutex> lock(mutex_);
--- a/speechx/speechx/frontend/audio/compute_fbank_main.cc
+++ b/speechx/speechx/frontend/audio/compute_fbank_main.cc
@ -49,12 +49,11 @@ int main(int argc, char* argv[]) {
    std::unique_ptr<ppspeech::FrontendInterface> data_source(
        new ppspeech::AudioCache(3600 * 1600, false));
-    ppspeech::FbankOptions opt;
+    kaldi::FbankOptions opt;
-    opt.fbank_opts.frame_opts.frame_length_ms = 25;
+    opt.frame_opts.frame_length_ms = 25;
-    opt.fbank_opts.frame_opts.frame_shift_ms = 10;
+    opt.frame_opts.frame_shift_ms = 10;
-    opt.streaming_chunk = FLAGS_streaming_chunk;
+    opt.mel_opts.num_bins = FLAGS_num_bins;
-    opt.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
+    opt.frame_opts.dither = 0.0;
    opt.fbank_opts.frame_opts.dither = 0.0;
    std::unique_ptr<ppspeech::FrontendInterface> fbank(
        new ppspeech::Fbank(opt, std::move(data_source)));
--- a/speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc
+++ b/speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc
@ -49,7 +49,6 @@ int main(int argc, char* argv[]) {
    ppspeech::LinearSpectrogramOptions opt;
    opt.frame_opts.frame_length_ms = 20;
    opt.frame_opts.frame_shift_ms = 10;
    opt.streaming_chunk = FLAGS_streaming_chunk;
    opt.frame_opts.dither = 0.0;
    opt.frame_opts.remove_dc_offset = false;
    opt.frame_opts.window_type = "hanning";
--- a/speechx/speechx/frontend/audio/fbank.cc
+++ b/speechx/speechx/frontend/audio/fbank.cc
@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "frontend/audio/fbank.h"
 #include "kaldi/base/kaldi-math.h"
 #include "kaldi/feat/feature-common.h"
@ -29,95 +28,33 @@ using kaldi::VectorBase;
 using kaldi::Matrix;
 using std::vector;
-// todo refactor later:(SmileGoat)
+FbankComputer::FbankComputer(const Options& opts)
 Fbank::Fbank(const FbankOptions& opts,
             std::unique_ptr<FrontendInterface> base_extractor)
    : opts_(opts),
-      computer_(opts.fbank_opts),
+    computer_(opts) {}
      window_function_(opts.fbank_opts.frame_opts) {
    base_extractor_ = std::move(base_extractor);
    chunk_sample_size_ = static_cast<int32>(
        opts.streaming_chunk * opts.fbank_opts.frame_opts.samp_freq);
 }
-void Fbank::Accept(const VectorBase<BaseFloat>& inputs) {
+int32 FbankComputer::Dim() const {
-    base_extractor_->Accept(inputs);
+    return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
 }
-bool Fbank::Read(Vector<BaseFloat>* feats) {
+bool FbankComputer::NeedRawLogEnergy() {
-    Vector<BaseFloat> wav(chunk_sample_size_);
+    return opts_.use_energy && opts_.raw_energy; 
    bool flag = base_extractor_->Read(&wav);
    if (flag == false || wav.Dim() == 0) return false;
    // append remaned waves
    int32 wav_len = wav.Dim();
    int32 left_len = remained_wav_.Dim();
    Vector<BaseFloat> waves(left_len + wav_len);
    waves.Range(0, left_len).CopyFromVec(remained_wav_);
    waves.Range(left_len, wav_len).CopyFromVec(wav);
    // compute speech feature
    Compute(waves, feats);
    // cache remaned waves
    kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
    int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts);
    int32 frame_shift = frame_opts.WindowShift();
    int32 left_samples = waves.Dim() - frame_shift * num_frames;
    remained_wav_.Resize(left_samples);
    remained_wav_.CopyFromVec(
        waves.Range(frame_shift * num_frames, left_samples));
    return true;
 }
-// Compute spectrogram feat
+// Compute feat
-bool Fbank::Compute(const Vector<BaseFloat>& waves, Vector<BaseFloat>* feats) {
+bool FbankComputer::Compute(Vector<BaseFloat>* window, Vector<BaseFloat>* feat) {
-    const kaldi::FrameExtractionOptions& frame_opts =
+    RealFft(window, true);
-        computer_.GetFrameOptions();
+    kaldi::ComputePowerSpectrum(window);
-    int32 num_samples = waves.Dim();
+    const kaldi::MelBanks& mel_bank = *(computer_.GetMelBanks(1.0));
-    int32 frame_length = frame_opts.WindowSize();
+    SubVector<BaseFloat> power_spectrum(*window, 0, window->Dim() / 2 + 1);
-    int32 sample_rate = frame_opts.samp_freq;
+    if (!opts_.use_power) {
-    if (num_samples < frame_length) {
+        power_spectrum.ApplyPow(0.5);
        return true;
    }
    int32 num_frames = kaldi::NumFrames(num_samples, frame_opts);
    feats->Resize(num_frames * Dim());
    Vector<BaseFloat> window;
    bool need_raw_log_energy = computer_.NeedRawLogEnergy();
    for (int32 frame = 0; frame < num_frames; frame++) {
        BaseFloat raw_log_energy = 0.0;
        kaldi::ExtractWindow(0,
                             waves,
                             frame,
                             frame_opts,
                             window_function_,
                             &window,
                             need_raw_log_energy ? &raw_log_energy : NULL);
        Vector<BaseFloat> this_feature(computer_.Dim(), kaldi::kUndefined);
        // note: this online feature-extraction code does not support VTLN.
        RealFft(&window, true);
        kaldi::ComputePowerSpectrum(&window);
        const kaldi::MelBanks& mel_bank = *(computer_.GetMelBanks(1.0));
        SubVector<BaseFloat> power_spectrum(window, 0, window.Dim() / 2 + 1);
        if (!opts_.fbank_opts.use_power) {
            power_spectrum.ApplyPow(0.5);
        }
        int32 mel_offset =
            ((opts_.fbank_opts.use_energy && !opts_.fbank_opts.htk_compat) ? 1
                                                                           : 0);
        SubVector<BaseFloat> mel_energies(
            this_feature, mel_offset, opts_.fbank_opts.mel_opts.num_bins);
        mel_bank.Compute(power_spectrum, &mel_energies);
        mel_energies.ApplyFloor(1e-07);
        mel_energies.ApplyLog();
        SubVector<BaseFloat> output_row(feats->Data() + frame * Dim(), Dim());
        output_row.CopyFromVec(this_feature);
    }
    int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
    SubVector<BaseFloat> mel_energies(
        *feat, mel_offset, opts_.mel_opts.num_bins);
    mel_bank.Compute(power_spectrum, &mel_energies);
    mel_energies.ApplyFloor(1e-07);
    mel_energies.ApplyLog();
    return true;
 }
--- a/speechx/speechx/frontend/audio/fbank.h
+++ b/speechx/speechx/frontend/audio/fbank.h
@ -16,62 +16,35 @@
 #include "base/common.h"
 #include "frontend/audio/frontend_itf.h"
 #include "frontend/audio/feature_common.h"
 #include "kaldi/feat/feature-fbank.h"
 #include "kaldi/feat/feature-mfcc.h"
 #include "kaldi/matrix/kaldi-vector.h"
 namespace ppspeech {
-struct FbankOptions {
+class FbankComputer {
    kaldi::FbankOptions fbank_opts;
    kaldi::BaseFloat streaming_chunk;  // second
    FbankOptions() : streaming_chunk(0.1), fbank_opts() {}
    void Register(kaldi::OptionsItf* opts) {
        opts->Register("streaming-chunk",
                       &streaming_chunk,
                       "streaming chunk size, default: 0.1 sec");
        fbank_opts.Register(opts);
    }
 };
 class Fbank : public FrontendInterface {
  public:
-    explicit Fbank(const FbankOptions& opts,
+    typedef kaldi::FbankOptions Options;     
-                   std::unique_ptr<FrontendInterface> base_extractor);
+    explicit FbankComputer(const Options& opts);
    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
-    // the dim_ is the dim of single frame feature
+    kaldi::FrameExtractionOptions& GetFrameOptions() {
-    virtual size_t Dim() const { return computer_.Dim(); }
+      return opts_.frame_opts;
-
+    }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
-    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
+    bool Compute(kaldi::Vector<kaldi::BaseFloat>* window,
                 kaldi::Vector<kaldi::BaseFloat>* feat);
    int32 Dim() const;
-    virtual void Reset() {
+    bool NeedRawLogEnergy();
        base_extractor_->Reset();
        remained_wav_.Resize(0);
    }
  private:
-    bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
+    Options opts_;
                 kaldi::Vector<kaldi::BaseFloat>* feats);
    FbankOptions opts_;
    std::unique_ptr<FrontendInterface> base_extractor_;
    kaldi::FeatureWindowFunction window_function_;
    kaldi::FbankComputer computer_;
-    // features_ is the Mfcc or Plp or Fbank features that we have already
+    //DISALLOW_COPY_AND_ASSIGN(FbankComputer);
    // computed.
    kaldi::Vector<kaldi::BaseFloat> features_;
    kaldi::Vector<kaldi::BaseFloat> remained_wav_;
    kaldi::int32 chunk_sample_size_;
    DISALLOW_COPY_AND_ASSIGN(Fbank);
 };
 typedef StreamingFeatureTpl<FbankComputer> Fbank;
 }  // namespace ppspeech
--- a/speechx/speechx/frontend/audio/feature_common.h
+++ b/speechx/speechx/frontend/audio/feature_common.h
@ -0,0 +1,54 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "frontend_itf.h"
 #include "kaldi/feat/feature-window.h"
 namespace ppspeech {
 template <class F>
 class StreamingFeatureTpl : public FrontendInterface {
  public:
    typedef typename F::Options Options;
    StreamingFeatureTpl(const Options& opts, 
                        std::unique_ptr<FrontendInterface> base_extractor);
    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
    // the dim_ is the dim of single frame feature
    virtual size_t Dim() const { return computer_.Dim(); }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
    virtual void Reset() {
        base_extractor_->Reset();
        remained_wav_.Resize(0);
    }
  private:
    bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves, 
                 kaldi::Vector<kaldi::BaseFloat>* feats);
    Options opts_;
    std::unique_ptr<FrontendInterface> base_extractor_;
    kaldi::FeatureWindowFunction window_function_;
    kaldi::Vector<kaldi::BaseFloat> remained_wav_;
    F computer_;
 };
 }  // namespace ppspeech
 #include "frontend/audio/feature_common_inl.h"
--- a/speechx/speechx/frontend/audio/feature_common_inl.h
+++ b/speechx/speechx/frontend/audio/feature_common_inl.h
@ -0,0 +1,95 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 namespace ppspeech {
 template <class F>
 StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts, 
                        std::unique_ptr<FrontendInterface> base_extractor):
                        opts_(opts),
                        computer_(opts),
                        window_function_(opts.frame_opts) {
    base_extractor_ = std::move(base_extractor);
 }
 template <class F>
 void StreamingFeatureTpl<F>::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
    base_extractor_->Accept(inputs);
 }
 template <class F>
 bool StreamingFeatureTpl<F>::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
    kaldi::Vector<kaldi::BaseFloat> wav(base_extractor_->Dim());
    bool flag = base_extractor_->Read(&wav);
    if (flag == false || wav.Dim() == 0) return false;
    // append remaned waves
    int32 wav_len = wav.Dim();
    int32 left_len = remained_wav_.Dim();
    kaldi::Vector<kaldi::BaseFloat> waves(left_len + wav_len);
    waves.Range(0, left_len).CopyFromVec(remained_wav_);
    waves.Range(left_len, wav_len).CopyFromVec(wav);
    // compute speech feature
    Compute(waves, feats);
    // cache remaned waves
    kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
    int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts);
    int32 frame_shift = frame_opts.WindowShift();
    int32 left_samples = waves.Dim() - frame_shift * num_frames;
    remained_wav_.Resize(left_samples);
    remained_wav_.CopyFromVec(
        waves.Range(frame_shift * num_frames, left_samples));
    return true;
 }
 // Compute feat
 template <class F>
 bool StreamingFeatureTpl<F>::Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
                                     kaldi::Vector<kaldi::BaseFloat>* feats) {
    const kaldi::FrameExtractionOptions& frame_opts =
        computer_.GetFrameOptions();
    int32 num_samples = waves.Dim();
    int32 frame_length = frame_opts.WindowSize();
    int32 sample_rate = frame_opts.samp_freq;
    if (num_samples < frame_length) {
        return true;
    }
    int32 num_frames = kaldi::NumFrames(num_samples, frame_opts);
    feats->Resize(num_frames * Dim());
    kaldi::Vector<kaldi::BaseFloat> window;
    bool need_raw_log_energy = computer_.NeedRawLogEnergy();
    for (int32 frame = 0; frame < num_frames; frame++) {
        kaldi::BaseFloat raw_log_energy = 0.0;
        kaldi::ExtractWindow(0,
                             waves,
                             frame,
                             frame_opts,
                             window_function_,
                             &window,
                             need_raw_log_energy ? &raw_log_energy : NULL);
        kaldi::Vector<kaldi::BaseFloat> this_feature(computer_.Dim(), kaldi::kUndefined);
        computer_.Compute(&window, &this_feature);
        kaldi::SubVector<kaldi::BaseFloat> output_row(feats->Data() + frame * Dim(), Dim());
        output_row.CopyFromVec(this_feature);
    }
    return true;
 }
 }  // namespace ppspeech
--- a/speechx/speechx/frontend/audio/feature_pipeline.h
+++ b/speechx/speechx/frontend/audio/feature_pipeline.h
@ -32,7 +32,7 @@ struct FeaturePipelineOptions {
    bool to_float32;  // true, only for linear feature
    bool use_fbank;
    LinearSpectrogramOptions linear_spectrogram_opts;
-    FbankOptions fbank_opts;
+    kaldi::FbankOptions fbank_opts;
    FeatureCacheOptions feature_cache_opts;
    AssemblerOptions assembler_opts;
--- a/speechx/speechx/frontend/audio/linear_spectrogram.cc
+++ b/speechx/speechx/frontend/audio/linear_spectrogram.cc
@ -28,81 +28,32 @@ using kaldi::VectorBase;
 using kaldi::Matrix;
 using std::vector;
-LinearSpectrogram::LinearSpectrogram(
+LinearSpectrogramComputer::LinearSpectrogramComputer(
-    const LinearSpectrogramOptions& opts,
+    const Options& opts)
-    std::unique_ptr<FrontendInterface> base_extractor)
+    : opts_(opts) {
-    : opts_(opts), feature_window_funtion_(opts.frame_opts) {
+    kaldi::FeatureWindowFunction feature_window_function(opts.frame_opts);
    base_extractor_ = std::move(base_extractor);
    int32 window_size = opts.frame_opts.WindowSize();
-    int32 window_shift = opts.frame_opts.WindowShift();
+    frame_length_ = window_size;
    dim_ = window_size / 2 + 1;
-    chunk_sample_size_ =
+    BaseFloat hanning_window_energy = kaldi::VecVec(feature_window_function.window,
-        static_cast<int32>(opts.streaming_chunk * opts.frame_opts.samp_freq);
+                                          feature_window_function.window);
-    hanning_window_energy_ = kaldi::VecVec(feature_window_funtion_.window,
+    int32 sample_rate = opts.frame_opts.samp_freq;
-                                           feature_window_funtion_.window);
+    scale_ = 2.0 / (hanning_window_energy * sample_rate);
 }
 void LinearSpectrogram::Accept(const VectorBase<BaseFloat>& inputs) {
    base_extractor_->Accept(inputs);
 }
 bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
    Vector<BaseFloat> input_feats(chunk_sample_size_);
    bool flag = base_extractor_->Read(&input_feats);
    if (flag == false || input_feats.Dim() == 0) return false;
    int32 feat_len = input_feats.Dim();
    int32 left_len = remained_wav_.Dim();
    Vector<BaseFloat> waves(feat_len + left_len);
    waves.Range(0, left_len).CopyFromVec(remained_wav_);
    waves.Range(left_len, feat_len).CopyFromVec(input_feats);
    Compute(waves, feats);
    int32 frame_shift = opts_.frame_opts.WindowShift();
    int32 num_frames = kaldi::NumFrames(waves.Dim(), opts_.frame_opts);
    int32 left_samples = waves.Dim() - frame_shift * num_frames;
    remained_wav_.Resize(left_samples);
    remained_wav_.CopyFromVec(
        waves.Range(frame_shift * num_frames, left_samples));
    return true;
 }
 // Compute spectrogram feat
-bool LinearSpectrogram::Compute(const Vector<BaseFloat>& waves,
+bool LinearSpectrogramComputer::Compute(Vector<BaseFloat>* window,
-                                Vector<BaseFloat>* feats) {
+                                Vector<BaseFloat>* feat) {
-    int32 num_samples = waves.Dim();
+    window->Resize(frame_length_, kaldi::kCopyData);
-    int32 frame_length = opts_.frame_opts.WindowSize();
+    RealFft(window, true);
-    int32 sample_rate = opts_.frame_opts.samp_freq;
+    kaldi::ComputePowerSpectrum(window);
-    BaseFloat scale = 2.0 / (hanning_window_energy_ * sample_rate);
+    SubVector<BaseFloat> power_spectrum(*window, 0, dim_);
-
+    power_spectrum.Scale(scale_);
-    if (num_samples < frame_length) {
+    power_spectrum(0) = power_spectrum(0) / 2;
-        return true;
+    power_spectrum(dim_ - 1) = power_spectrum(dim_ - 1) / 2;
-    }
+    power_spectrum.Add(1e-14);
-
+    power_spectrum.ApplyLog();
-    int32 num_frames = kaldi::NumFrames(num_samples, opts_.frame_opts);
+    feat->CopyFromVec(power_spectrum);
    feats->Resize(num_frames * dim_);
    Vector<BaseFloat> window;
    for (int frame_idx = 0; frame_idx < num_frames; ++frame_idx) {
        kaldi::ExtractWindow(0,
                             waves,
                             frame_idx,
                             opts_.frame_opts,
                             feature_window_funtion_,
                             &window,
                             NULL);
        SubVector<BaseFloat> output_row(feats->Data() + frame_idx * dim_, dim_);
        window.Resize(frame_length, kaldi::kCopyData);
        RealFft(&window, true);
        kaldi::ComputePowerSpectrum(&window);
        SubVector<BaseFloat> power_spectrum(window, 0, dim_);
        power_spectrum.Scale(scale);
        power_spectrum(0) = power_spectrum(0) / 2;
        power_spectrum(dim_ - 1) = power_spectrum(dim_ - 1) / 2;
        power_spectrum.Add(1e-14);
        power_spectrum.ApplyLog();
        output_row.CopyFromVec(power_spectrum);
    }
    return true;
 }
--- a/speechx/speechx/frontend/audio/linear_spectrogram.h
+++ b/speechx/speechx/frontend/audio/linear_spectrogram.h
@ -18,52 +18,41 @@
 #include "base/common.h"
 #include "frontend/audio/frontend_itf.h"
 #include "kaldi/feat/feature-window.h"
 #include "frontend/audio/feature_common.h"
 namespace ppspeech {
 struct LinearSpectrogramOptions {
    kaldi::FrameExtractionOptions frame_opts;
-    kaldi::BaseFloat streaming_chunk;  // second
+    LinearSpectrogramOptions(): frame_opts() {}
    LinearSpectrogramOptions() : streaming_chunk(0.1), frame_opts() {}
    void Register(kaldi::OptionsItf* opts) {
        opts->Register("streaming-chunk",
                       &streaming_chunk,
                       "streaming chunk size, default: 0.1 sec");
        frame_opts.Register(opts);
    }
 };
-class LinearSpectrogram : public FrontendInterface {
+class LinearSpectrogramComputer {
  public:
-    explicit LinearSpectrogram(
+    typedef LinearSpectrogramOptions Options;
-        const LinearSpectrogramOptions& opts,
+    explicit LinearSpectrogramComputer(const Options& opts);
-        std::unique_ptr<FrontendInterface> base_extractor);
+
-    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
+    kaldi::FrameExtractionOptions& GetFrameOptions() {
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
+      return opts_.frame_opts;
    // the dim_ is the dim of single frame feature
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
    virtual void Reset() {
        base_extractor_->Reset();
        remained_wav_.Resize(0);
    }
-  private:
+    bool Compute(kaldi::Vector<kaldi::BaseFloat>* window,
-    bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
+                 kaldi::Vector<kaldi::BaseFloat>* feat);
                 kaldi::Vector<kaldi::BaseFloat>* feats);
-    size_t dim_;
+    int32 Dim() const { return dim_; }
-    kaldi::FeatureWindowFunction feature_window_funtion_;
+
-    kaldi::BaseFloat hanning_window_energy_;
+    bool NeedRawLogEnergy() { return false; }
-    LinearSpectrogramOptions opts_;
+    
-    std::unique_ptr<FrontendInterface> base_extractor_;
+  private:
-    kaldi::Vector<kaldi::BaseFloat> remained_wav_;
+    kaldi::BaseFloat scale_;
-    int chunk_sample_size_;
+    Options opts_;
-    DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram);
+    int32 frame_length_;
    int32 dim_;
 };
 typedef StreamingFeatureTpl<LinearSpectrogramComputer> LinearSpectrogram;
    //DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram);
 }  // namespace ppspeech