rename arg of Accept & Read

4 years ago · 027feae9f2
parent 22fe1c9dbe
commit 027feae9f2
9 changed files with 63 additions and 61 deletions
--- a/speechx/speechx/frontend/feature_cache.cc
+++ b/speechx/speechx/frontend/feature_cache.cc
@ -40,7 +40,7 @@ void FeatureCache::Accept(
 }

 // pop feature chunk
-bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* output_feats) {
+bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
    kaldi::Timer timer;
    std::unique_lock<std::mutex> lock(mutex_);
    while (cache_.empty() && base_extractor_->IsFinished() == false) {
@ -53,8 +53,8 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* output_feats) {
        usleep(1000);  // sleep 1 ms
    }
    if (cache_.empty()) return false;
-    output_feats->Resize(cache_.front().Dim());
-    output_feats->CopyFromVec(cache_.front());
+    feats->Resize(cache_.front().Dim());
+    feats->CopyFromVec(cache_.front());
    cache_.pop();
    ready_feed_condition_.notify_one();
    return true;
--- a/speechx/speechx/frontend/feature_cache.h
+++ b/speechx/speechx/frontend/feature_cache.h
@ -26,8 +26,8 @@ class FeatureCache : public FeatureExtractorInterface {
        std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
    virtual void Accept(
        const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
-    // output_feats dim = num_frames * feature_dim
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* output_feats);
+    // feats dim = num_frames * feature_dim
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
    // feature cache only cache feature which from base extractor
    virtual size_t Dim() const { return base_extractor_->Dim(); }
    virtual void SetFinished() {
--- a/speechx/speechx/frontend/feature_extractor_interface.h
+++ b/speechx/speechx/frontend/feature_extractor_interface.h
@ -21,7 +21,8 @@ namespace ppspeech {

 class FeatureExtractorInterface {
  public:
-    // accept input data
+    // accept input data, accept feature or raw waves which decided 
+    // by the base_extractor
    virtual void Accept(
        const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
    // get the processed result
--- a/speechx/speechx/frontend/linear_spectrogram.cc
+++ b/speechx/speechx/frontend/linear_spectrogram.cc
@ -70,7 +70,7 @@ void LinearSpectrogram::Accept(const VectorBase<BaseFloat>& inputs) {
    base_extractor_->Accept(inputs);
 }

-bool LinearSpectrogram::Read(Vector<BaseFloat>* output_feats) {
+bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
    Vector<BaseFloat> input_feats(chunk_sample_size_);
    bool flag = base_extractor_->Read(&input_feats);
    if (flag == false || input_feats.Dim() == 0) return false;
@ -83,10 +83,10 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* output_feats) {
    if (result.size() != 0) {
        feat_size = result.size() * result[0].size();
    }
-    output_feats->Resize(feat_size);
+    feats->Resize(feat_size);
    // todo refactor (SimleGoat)
    for (size_t idx = 0; idx < feat_size; ++idx) {
-        (*output_feats)(idx) = result[idx / dim_][idx % dim_];
+        (*feats)(idx) = result[idx / dim_][idx % dim_];
    }
    return true;
 }
@ -120,9 +120,9 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,

 // Compute spectrogram feat
 // todo: refactor later (SmileGoat)
-bool LinearSpectrogram::Compute(const vector<float>& wave,
-                                vector<vector<float>>& feat) {
-    int num_samples = wave.size();
+bool LinearSpectrogram::Compute(const vector<float>& waves,
+                                vector<vector<float>>& feats) {
+    int num_samples = waves.size();
    const int& frame_length = opts_.frame_opts.WindowSize();
    const int& sample_rate = opts_.frame_opts.samp_freq;
    const int& frame_shift = opts_.frame_opts.WindowShift();
@ -134,34 +134,34 @@ bool LinearSpectrogram::Compute(const vector<float>& wave,
    }

    int num_frames = 1 + ((num_samples - frame_length) / frame_shift);
-    feat.resize(num_frames);
+    feats.resize(num_frames);
    vector<float> fft_real((fft_points_ / 2 + 1), 0);
    vector<float> fft_img((fft_points_ / 2 + 1), 0);
    vector<float> v(frame_length, 0);
    vector<float> power((fft_points / 2 + 1));

    for (int i = 0; i < num_frames; ++i) {
-        vector<float> data(wave.data() + i * frame_shift,
-                           wave.data() + i * frame_shift + frame_length);
+        vector<float> data(waves.data() + i * frame_shift,
+                           waves.data() + i * frame_shift + frame_length);
        Hanning(&data);
        fft_img.clear();
        fft_real.clear();
        v.assign(data.begin(), data.end());
        NumpyFft(&v, &fft_real, &fft_img);

-        feat[i].resize(fft_points / 2 + 1);  // the last dimension is Fs/2 Hz
+        feats[i].resize(fft_points / 2 + 1);  // the last dimension is Fs/2 Hz
        for (int j = 0; j < (fft_points / 2 + 1); ++j) {
            power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
-            feat[i][j] = power[j];
+            feats[i][j] = power[j];

-            if (j == 0 || j == feat[0].size() - 1) {
-                feat[i][j] /= scale;
+            if (j == 0 || j == feats[0].size() - 1) {
+                feats[i][j] /= scale;
            } else {
-                feat[i][j] *= (2.0 / scale);
+                feats[i][j] *= (2.0 / scale);
            }

            // log added eps=1e-14
-            feat[i][j] = std::log(feat[i][j] + 1e-14);
+            feats[i][j] = std::log(feats[i][j] + 1e-14);
        }
    }
    return true;
--- a/speechx/speechx/frontend/linear_spectrogram.h
+++ b/speechx/speechx/frontend/linear_spectrogram.h
@ -40,7 +40,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
        std::unique_ptr<FeatureExtractorInterface> base_extractor);
    virtual void Accept(
        const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* output_feats);
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
    // the dim_ is the dim of single frame feature
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
@ -48,8 +48,8 @@ class LinearSpectrogram : public FeatureExtractorInterface {

  private:
    void Hanning(std::vector<kaldi::BaseFloat>* data) const;
-    bool Compute(const std::vector<kaldi::BaseFloat>& wave,
-                 std::vector<std::vector<kaldi::BaseFloat>>& feat);
+    bool Compute(const std::vector<kaldi::BaseFloat>& waves,
+                 std::vector<std::vector<kaldi::BaseFloat>>& feats);
    bool NumpyFft(std::vector<kaldi::BaseFloat>* v,
                  std::vector<kaldi::BaseFloat>* real,
                  std::vector<kaldi::BaseFloat>* img) const;
--- a/speechx/speechx/frontend/normalizer.cc
+++ b/speechx/speechx/frontend/normalizer.cc
@ -35,16 +35,16 @@ DecibelNormalizer::DecibelNormalizer(
 }

 void DecibelNormalizer::Accept(
-    const kaldi::VectorBase<BaseFloat>& inputs_wave) {
-    base_extractor_->Accept(inputs_wave);
+    const kaldi::VectorBase<BaseFloat>& waves) {
+    base_extractor_->Accept(waves);
 }

-bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* outputs_wave) {
-    if (base_extractor_->Read(outputs_wave) == false || 
-        outputs_wave->Dim() == 0) {
+bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* waves) {
+    if (base_extractor_->Read(waves) == false || 
+        waves->Dim() == 0) {
        return false;
    }
-    Compute(outputs_wave);
+    Compute(waves);
    return true;
 }

@ -67,7 +67,7 @@ void CopyStdVector2Vector(const vector<BaseFloat>& input,
    }
 }

-bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const {
+bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
    // calculate db rms
    BaseFloat rms_db = 0.0;
    BaseFloat mean_square = 0.0;
@ -75,9 +75,9 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const {
    BaseFloat wave_float_normlization = 1.0f / (std::pow(2, 16 - 1));

    vector<BaseFloat> samples;
-    samples.resize(feats->Dim());
+    samples.resize(waves->Dim());
    for (size_t i = 0; i < samples.size(); ++i) {
-        samples[i] = (*feats)(i);
+        samples[i] = (*waves)(i);
    }

    // square
@ -107,7 +107,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const {
        item *= std::pow(10.0, gain / 20.0);
    }

-    CopyStdVector2Vector(samples, feats);
+    CopyStdVector2Vector(samples, waves);
    return true;
 }

@ -121,16 +121,16 @@ CMVN::CMVN(std::string cmvn_file,
    dim_ = stats_.NumCols() - 1;
 }

-void CMVN::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& feats) {
-    base_extractor_->Accept(feats);
+void CMVN::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
+    base_extractor_->Accept(inputs);
    return;
 }

-bool CMVN::Read(kaldi::Vector<BaseFloat>* outputs) {
-    if (base_extractor_->Read(outputs) == false) {
+bool CMVN::Read(kaldi::Vector<BaseFloat>* feats) {
+    if (base_extractor_->Read(feats) == false) {
        return false;
    }
-    Compute(outputs);
+    Compute(feats);
    return true;
 }

--- a/speechx/speechx/frontend/normalizer.h
+++ b/speechx/speechx/frontend/normalizer.h
@ -46,15 +46,15 @@ class DecibelNormalizer : public FeatureExtractorInterface {
        const DecibelNormalizerOptions& opts,
        std::unique_ptr<FeatureExtractorInterface> base_extractor);
    virtual void Accept(
-        const kaldi::VectorBase<kaldi::BaseFloat>& inputs_wave);
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* outputs_wave);
+        const kaldi::VectorBase<kaldi::BaseFloat>& waves);
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
    // noramlize audio, the dim is 1.
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }

  private:
-    bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const;
+    bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
    DecibelNormalizerOptions opts_;
    size_t dim_;
    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
@ -67,11 +67,11 @@ class CMVN : public FeatureExtractorInterface {
    explicit CMVN(std::string cmvn_file,
                  std::unique_ptr<FeatureExtractorInterface> base_extractor);
    virtual void Accept(
-        const kaldi::VectorBase<kaldi::BaseFloat>& feats);
+        const kaldi::VectorBase<kaldi::BaseFloat>& inputs);

-    // the length of outputs = feature_row * feature_dim,
+    // the length of feats = feature_row * feature_dim,
    // the Matrix is squashed into Vector
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* outputs);
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
    // the dim_ is the feautre dim.
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
--- a/speechx/speechx/frontend/raw_audio.cc
+++ b/speechx/speechx/frontend/raw_audio.cc
@ -26,20 +26,20 @@ RawAudioCache::RawAudioCache(int buffer_size)
    ring_buffer_.resize(buffer_size);
 }

-void RawAudioCache::Accept(const VectorBase<BaseFloat>& input_audio) {
+void RawAudioCache::Accept(const VectorBase<BaseFloat>& waves) {
    std::unique_lock<std::mutex> lock(mutex_);
-    while (data_length_ + input_audio.Dim() > ring_buffer_.size()) {
+    while (data_length_ + waves.Dim() > ring_buffer_.size()) {
        ready_feed_condition_.wait(lock);
    }
-    for (size_t idx = 0; idx < input_audio.Dim(); ++idx) {
+    for (size_t idx = 0; idx < waves.Dim(); ++idx) {
        int32 buffer_idx = (idx + start_) % ring_buffer_.size(); 
-        ring_buffer_[buffer_idx] = input_audio(idx);
+        ring_buffer_[buffer_idx] = waves(idx);
    }
-    data_length_ += input_audio.Dim();
+    data_length_ += waves.Dim();
 }

-bool RawAudioCache::Read(Vector<BaseFloat>* output_audio) {
-    size_t chunk_size = output_audio->Dim();
+bool RawAudioCache::Read(Vector<BaseFloat>* waves) {
+    size_t chunk_size = waves->Dim();
    kaldi::Timer timer;
    std::unique_lock<std::mutex> lock(mutex_);
    while (chunk_size > data_length_) {
@ -61,12 +61,12 @@ bool RawAudioCache::Read(Vector<BaseFloat>* output_audio) {
    // read last chunk data
    if (chunk_size > data_length_) {
        chunk_size = data_length_;
-        output_audio->Resize(chunk_size);
+        waves->Resize(chunk_size);
    }

    for (size_t idx = 0; idx < chunk_size; ++idx) {
        int buff_idx = (start_ + idx) % ring_buffer_.size();
-        output_audio->Data()[idx] = ring_buffer_[buff_idx];
+        waves->Data()[idx] = ring_buffer_[buff_idx];
    }
    data_length_ -= chunk_size;
    start_ = (start_ + chunk_size) % ring_buffer_.size();
--- a/speechx/speechx/frontend/raw_audio.h
+++ b/speechx/speechx/frontend/raw_audio.h
@ -23,8 +23,8 @@ namespace ppspeech {
 class RawAudioCache : public FeatureExtractorInterface {
  public:
    explicit RawAudioCache(int buffer_size = kint16max);
-    virtual void Accept(const kaldi::VectorBase<BaseFloat>& input_audio);
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* output_audio);
+    virtual void Accept(const kaldi::VectorBase<BaseFloat>& waves);
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
    // the audio dim is 1
    virtual size_t Dim() const { return 1; }
    virtual void SetFinished() {
@ -45,19 +45,20 @@ class RawAudioCache : public FeatureExtractorInterface {
    DISALLOW_COPY_AND_ASSIGN(RawAudioCache);
 };

-// it is a datasource for testing different frontend module.
+// it is a data source to test different frontend module.
+// it Accepts waves or feats. 
 class RawDataCache: public FeatureExtractorInterface {
  public:
    explicit RawDataCache() { finished_ = false; }
    virtual void Accept(
-        const kaldi::VectorBase<kaldi::BaseFloat>& input) {
-        data_ = input;
+        const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
+        data_ = inputs;
    }
-    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feat) {
+    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
        if (data_.Dim() == 0) {
            return false;
        }
-        (*feat) = data_;
+        (*feats) = data_;
        data_.Resize(0);
        return true;
    }