refactor frontend

pull/2003/head
Yang Zhou 3 years ago
parent 2fd904e9be
commit 156ccfe4e3

@ -62,7 +62,6 @@ namespace ppspeech {
FeaturePipelineOptions InitFeaturePipelineOptions() { FeaturePipelineOptions InitFeaturePipelineOptions() {
FeaturePipelineOptions opts; FeaturePipelineOptions opts;
opts.cmvn_file = FLAGS_cmvn_file; opts.cmvn_file = FLAGS_cmvn_file;
opts.linear_spectrogram_opts.streaming_chunk = FLAGS_streaming_chunk;
kaldi::FrameExtractionOptions frame_opts; kaldi::FrameExtractionOptions frame_opts;
frame_opts.dither = 0.0; frame_opts.dither = 0.0;
frame_opts.frame_shift_ms = 10; frame_opts.frame_shift_ms = 10;
@ -71,8 +70,8 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
opts.to_float32 = false; opts.to_float32 = false;
frame_opts.window_type = "povey"; frame_opts.window_type = "povey";
frame_opts.frame_length_ms = 25; frame_opts.frame_length_ms = 25;
opts.fbank_opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins; opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
opts.fbank_opts.fbank_opts.frame_opts = frame_opts; opts.fbank_opts.frame_opts = frame_opts;
} else { } else {
opts.to_float32 = true; opts.to_float32 = true;
frame_opts.remove_dc_offset = false; frame_opts.remove_dc_offset = false;

@ -30,8 +30,8 @@ class AudioCache : public FrontendInterface {
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves); virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
// the audio dim is 1, one sample // the audio dim is 1, one sample, we return size_ instead.
virtual size_t Dim() const { return 1; } virtual size_t Dim() const { return size_; }
virtual void SetFinished() { virtual void SetFinished() {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);

@ -49,12 +49,11 @@ int main(int argc, char* argv[]) {
std::unique_ptr<ppspeech::FrontendInterface> data_source( std::unique_ptr<ppspeech::FrontendInterface> data_source(
new ppspeech::AudioCache(3600 * 1600, false)); new ppspeech::AudioCache(3600 * 1600, false));
ppspeech::FbankOptions opt; kaldi::FbankOptions opt;
opt.fbank_opts.frame_opts.frame_length_ms = 25; opt.frame_opts.frame_length_ms = 25;
opt.fbank_opts.frame_opts.frame_shift_ms = 10; opt.frame_opts.frame_shift_ms = 10;
opt.streaming_chunk = FLAGS_streaming_chunk; opt.mel_opts.num_bins = FLAGS_num_bins;
opt.fbank_opts.mel_opts.num_bins = FLAGS_num_bins; opt.frame_opts.dither = 0.0;
opt.fbank_opts.frame_opts.dither = 0.0;
std::unique_ptr<ppspeech::FrontendInterface> fbank( std::unique_ptr<ppspeech::FrontendInterface> fbank(
new ppspeech::Fbank(opt, std::move(data_source))); new ppspeech::Fbank(opt, std::move(data_source)));

@ -49,7 +49,6 @@ int main(int argc, char* argv[]) {
ppspeech::LinearSpectrogramOptions opt; ppspeech::LinearSpectrogramOptions opt;
opt.frame_opts.frame_length_ms = 20; opt.frame_opts.frame_length_ms = 20;
opt.frame_opts.frame_shift_ms = 10; opt.frame_opts.frame_shift_ms = 10;
opt.streaming_chunk = FLAGS_streaming_chunk;
opt.frame_opts.dither = 0.0; opt.frame_opts.dither = 0.0;
opt.frame_opts.remove_dc_offset = false; opt.frame_opts.remove_dc_offset = false;
opt.frame_opts.window_type = "hanning"; opt.frame_opts.window_type = "hanning";

@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "frontend/audio/fbank.h" #include "frontend/audio/fbank.h"
#include "kaldi/base/kaldi-math.h" #include "kaldi/base/kaldi-math.h"
#include "kaldi/feat/feature-common.h" #include "kaldi/feat/feature-common.h"
@ -29,95 +28,33 @@ using kaldi::VectorBase;
using kaldi::Matrix; using kaldi::Matrix;
using std::vector; using std::vector;
// todo refactor later:(SmileGoat) FbankComputer::FbankComputer(const Options& opts)
Fbank::Fbank(const FbankOptions& opts,
std::unique_ptr<FrontendInterface> base_extractor)
: opts_(opts), : opts_(opts),
computer_(opts.fbank_opts), computer_(opts) {}
window_function_(opts.fbank_opts.frame_opts) {
base_extractor_ = std::move(base_extractor);
chunk_sample_size_ = static_cast<int32>(
opts.streaming_chunk * opts.fbank_opts.frame_opts.samp_freq);
}
void Fbank::Accept(const VectorBase<BaseFloat>& inputs) { int32 FbankComputer::Dim() const {
base_extractor_->Accept(inputs); return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
} }
bool Fbank::Read(Vector<BaseFloat>* feats) { bool FbankComputer::NeedRawLogEnergy() {
Vector<BaseFloat> wav(chunk_sample_size_); return opts_.use_energy && opts_.raw_energy;
bool flag = base_extractor_->Read(&wav);
if (flag == false || wav.Dim() == 0) return false;
// append remaned waves
int32 wav_len = wav.Dim();
int32 left_len = remained_wav_.Dim();
Vector<BaseFloat> waves(left_len + wav_len);
waves.Range(0, left_len).CopyFromVec(remained_wav_);
waves.Range(left_len, wav_len).CopyFromVec(wav);
// compute speech feature
Compute(waves, feats);
// cache remaned waves
kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts);
int32 frame_shift = frame_opts.WindowShift();
int32 left_samples = waves.Dim() - frame_shift * num_frames;
remained_wav_.Resize(left_samples);
remained_wav_.CopyFromVec(
waves.Range(frame_shift * num_frames, left_samples));
return true;
} }
// Compute spectrogram feat // Compute feat
bool Fbank::Compute(const Vector<BaseFloat>& waves, Vector<BaseFloat>* feats) { bool FbankComputer::Compute(Vector<BaseFloat>* window, Vector<BaseFloat>* feat) {
const kaldi::FrameExtractionOptions& frame_opts = RealFft(window, true);
computer_.GetFrameOptions(); kaldi::ComputePowerSpectrum(window);
int32 num_samples = waves.Dim(); const kaldi::MelBanks& mel_bank = *(computer_.GetMelBanks(1.0));
int32 frame_length = frame_opts.WindowSize(); SubVector<BaseFloat> power_spectrum(*window, 0, window->Dim() / 2 + 1);
int32 sample_rate = frame_opts.samp_freq; if (!opts_.use_power) {
if (num_samples < frame_length) { power_spectrum.ApplyPow(0.5);
return true;
}
int32 num_frames = kaldi::NumFrames(num_samples, frame_opts);
feats->Resize(num_frames * Dim());
Vector<BaseFloat> window;
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
for (int32 frame = 0; frame < num_frames; frame++) {
BaseFloat raw_log_energy = 0.0;
kaldi::ExtractWindow(0,
waves,
frame,
frame_opts,
window_function_,
&window,
need_raw_log_energy ? &raw_log_energy : NULL);
Vector<BaseFloat> this_feature(computer_.Dim(), kaldi::kUndefined);
// note: this online feature-extraction code does not support VTLN.
RealFft(&window, true);
kaldi::ComputePowerSpectrum(&window);
const kaldi::MelBanks& mel_bank = *(computer_.GetMelBanks(1.0));
SubVector<BaseFloat> power_spectrum(window, 0, window.Dim() / 2 + 1);
if (!opts_.fbank_opts.use_power) {
power_spectrum.ApplyPow(0.5);
}
int32 mel_offset =
((opts_.fbank_opts.use_energy && !opts_.fbank_opts.htk_compat) ? 1
: 0);
SubVector<BaseFloat> mel_energies(
this_feature, mel_offset, opts_.fbank_opts.mel_opts.num_bins);
mel_bank.Compute(power_spectrum, &mel_energies);
mel_energies.ApplyFloor(1e-07);
mel_energies.ApplyLog();
SubVector<BaseFloat> output_row(feats->Data() + frame * Dim(), Dim());
output_row.CopyFromVec(this_feature);
} }
int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
SubVector<BaseFloat> mel_energies(
*feat, mel_offset, opts_.mel_opts.num_bins);
mel_bank.Compute(power_spectrum, &mel_energies);
mel_energies.ApplyFloor(1e-07);
mel_energies.ApplyLog();
return true; return true;
} }

@ -16,62 +16,35 @@
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/audio/frontend_itf.h"
#include "frontend/audio/feature_common.h"
#include "kaldi/feat/feature-fbank.h" #include "kaldi/feat/feature-fbank.h"
#include "kaldi/feat/feature-mfcc.h" #include "kaldi/feat/feature-mfcc.h"
#include "kaldi/matrix/kaldi-vector.h" #include "kaldi/matrix/kaldi-vector.h"
namespace ppspeech { namespace ppspeech {
struct FbankOptions { class FbankComputer {
kaldi::FbankOptions fbank_opts;
kaldi::BaseFloat streaming_chunk; // second
FbankOptions() : streaming_chunk(0.1), fbank_opts() {}
void Register(kaldi::OptionsItf* opts) {
opts->Register("streaming-chunk",
&streaming_chunk,
"streaming chunk size, default: 0.1 sec");
fbank_opts.Register(opts);
}
};
class Fbank : public FrontendInterface {
public: public:
explicit Fbank(const FbankOptions& opts, typedef kaldi::FbankOptions Options;
std::unique_ptr<FrontendInterface> base_extractor); explicit FbankComputer(const Options& opts);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// the dim_ is the dim of single frame feature kaldi::FrameExtractionOptions& GetFrameOptions() {
virtual size_t Dim() const { return computer_.Dim(); } return opts_.frame_opts;
}
virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); } bool Compute(kaldi::Vector<kaldi::BaseFloat>* window,
kaldi::Vector<kaldi::BaseFloat>* feat);
int32 Dim() const;
virtual void Reset() { bool NeedRawLogEnergy();
base_extractor_->Reset();
remained_wav_.Resize(0);
}
private: private:
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves, Options opts_;
kaldi::Vector<kaldi::BaseFloat>* feats);
FbankOptions opts_;
std::unique_ptr<FrontendInterface> base_extractor_;
kaldi::FeatureWindowFunction window_function_;
kaldi::FbankComputer computer_; kaldi::FbankComputer computer_;
// features_ is the Mfcc or Plp or Fbank features that we have already //DISALLOW_COPY_AND_ASSIGN(FbankComputer);
// computed.
kaldi::Vector<kaldi::BaseFloat> features_;
kaldi::Vector<kaldi::BaseFloat> remained_wav_;
kaldi::int32 chunk_sample_size_;
DISALLOW_COPY_AND_ASSIGN(Fbank);
}; };
typedef StreamingFeatureTpl<FbankComputer> Fbank;
} // namespace ppspeech } // namespace ppspeech

@ -0,0 +1,54 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "frontend_itf.h"
#include "kaldi/feat/feature-window.h"
namespace ppspeech {
template <class F>
class StreamingFeatureTpl : public FrontendInterface {
public:
typedef typename F::Options Options;
StreamingFeatureTpl(const Options& opts,
std::unique_ptr<FrontendInterface> base_extractor);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// the dim_ is the dim of single frame feature
virtual size_t Dim() const { return computer_.Dim(); }
virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() {
base_extractor_->Reset();
remained_wav_.Resize(0);
}
private:
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
kaldi::Vector<kaldi::BaseFloat>* feats);
Options opts_;
std::unique_ptr<FrontendInterface> base_extractor_;
kaldi::FeatureWindowFunction window_function_;
kaldi::Vector<kaldi::BaseFloat> remained_wav_;
F computer_;
};
} // namespace ppspeech
#include "frontend/audio/feature_common_inl.h"

@ -0,0 +1,95 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace ppspeech {
template <class F>
StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts,
std::unique_ptr<FrontendInterface> base_extractor):
opts_(opts),
computer_(opts),
window_function_(opts.frame_opts) {
base_extractor_ = std::move(base_extractor);
}
template <class F>
void StreamingFeatureTpl<F>::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
base_extractor_->Accept(inputs);
}
template <class F>
bool StreamingFeatureTpl<F>::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
kaldi::Vector<kaldi::BaseFloat> wav(base_extractor_->Dim());
bool flag = base_extractor_->Read(&wav);
if (flag == false || wav.Dim() == 0) return false;
// append remaned waves
int32 wav_len = wav.Dim();
int32 left_len = remained_wav_.Dim();
kaldi::Vector<kaldi::BaseFloat> waves(left_len + wav_len);
waves.Range(0, left_len).CopyFromVec(remained_wav_);
waves.Range(left_len, wav_len).CopyFromVec(wav);
// compute speech feature
Compute(waves, feats);
// cache remaned waves
kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts);
int32 frame_shift = frame_opts.WindowShift();
int32 left_samples = waves.Dim() - frame_shift * num_frames;
remained_wav_.Resize(left_samples);
remained_wav_.CopyFromVec(
waves.Range(frame_shift * num_frames, left_samples));
return true;
}
// Compute feat
template <class F>
bool StreamingFeatureTpl<F>::Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
kaldi::Vector<kaldi::BaseFloat>* feats) {
const kaldi::FrameExtractionOptions& frame_opts =
computer_.GetFrameOptions();
int32 num_samples = waves.Dim();
int32 frame_length = frame_opts.WindowSize();
int32 sample_rate = frame_opts.samp_freq;
if (num_samples < frame_length) {
return true;
}
int32 num_frames = kaldi::NumFrames(num_samples, frame_opts);
feats->Resize(num_frames * Dim());
kaldi::Vector<kaldi::BaseFloat> window;
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
for (int32 frame = 0; frame < num_frames; frame++) {
kaldi::BaseFloat raw_log_energy = 0.0;
kaldi::ExtractWindow(0,
waves,
frame,
frame_opts,
window_function_,
&window,
need_raw_log_energy ? &raw_log_energy : NULL);
kaldi::Vector<kaldi::BaseFloat> this_feature(computer_.Dim(), kaldi::kUndefined);
computer_.Compute(&window, &this_feature);
kaldi::SubVector<kaldi::BaseFloat> output_row(feats->Data() + frame * Dim(), Dim());
output_row.CopyFromVec(this_feature);
}
return true;
}
} // namespace ppspeech

@ -32,7 +32,7 @@ struct FeaturePipelineOptions {
bool to_float32; // true, only for linear feature bool to_float32; // true, only for linear feature
bool use_fbank; bool use_fbank;
LinearSpectrogramOptions linear_spectrogram_opts; LinearSpectrogramOptions linear_spectrogram_opts;
FbankOptions fbank_opts; kaldi::FbankOptions fbank_opts;
FeatureCacheOptions feature_cache_opts; FeatureCacheOptions feature_cache_opts;
AssemblerOptions assembler_opts; AssemblerOptions assembler_opts;

@ -28,81 +28,32 @@ using kaldi::VectorBase;
using kaldi::Matrix; using kaldi::Matrix;
using std::vector; using std::vector;
LinearSpectrogram::LinearSpectrogram( LinearSpectrogramComputer::LinearSpectrogramComputer(
const LinearSpectrogramOptions& opts, const Options& opts)
std::unique_ptr<FrontendInterface> base_extractor) : opts_(opts) {
: opts_(opts), feature_window_funtion_(opts.frame_opts) { kaldi::FeatureWindowFunction feature_window_function(opts.frame_opts);
base_extractor_ = std::move(base_extractor);
int32 window_size = opts.frame_opts.WindowSize(); int32 window_size = opts.frame_opts.WindowSize();
int32 window_shift = opts.frame_opts.WindowShift(); frame_length_ = window_size;
dim_ = window_size / 2 + 1; dim_ = window_size / 2 + 1;
chunk_sample_size_ = BaseFloat hanning_window_energy = kaldi::VecVec(feature_window_function.window,
static_cast<int32>(opts.streaming_chunk * opts.frame_opts.samp_freq); feature_window_function.window);
hanning_window_energy_ = kaldi::VecVec(feature_window_funtion_.window, int32 sample_rate = opts.frame_opts.samp_freq;
feature_window_funtion_.window); scale_ = 2.0 / (hanning_window_energy * sample_rate);
}
void LinearSpectrogram::Accept(const VectorBase<BaseFloat>& inputs) {
base_extractor_->Accept(inputs);
}
bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
Vector<BaseFloat> input_feats(chunk_sample_size_);
bool flag = base_extractor_->Read(&input_feats);
if (flag == false || input_feats.Dim() == 0) return false;
int32 feat_len = input_feats.Dim();
int32 left_len = remained_wav_.Dim();
Vector<BaseFloat> waves(feat_len + left_len);
waves.Range(0, left_len).CopyFromVec(remained_wav_);
waves.Range(left_len, feat_len).CopyFromVec(input_feats);
Compute(waves, feats);
int32 frame_shift = opts_.frame_opts.WindowShift();
int32 num_frames = kaldi::NumFrames(waves.Dim(), opts_.frame_opts);
int32 left_samples = waves.Dim() - frame_shift * num_frames;
remained_wav_.Resize(left_samples);
remained_wav_.CopyFromVec(
waves.Range(frame_shift * num_frames, left_samples));
return true;
} }
// Compute spectrogram feat // Compute spectrogram feat
bool LinearSpectrogram::Compute(const Vector<BaseFloat>& waves, bool LinearSpectrogramComputer::Compute(Vector<BaseFloat>* window,
Vector<BaseFloat>* feats) { Vector<BaseFloat>* feat) {
int32 num_samples = waves.Dim(); window->Resize(frame_length_, kaldi::kCopyData);
int32 frame_length = opts_.frame_opts.WindowSize(); RealFft(window, true);
int32 sample_rate = opts_.frame_opts.samp_freq; kaldi::ComputePowerSpectrum(window);
BaseFloat scale = 2.0 / (hanning_window_energy_ * sample_rate); SubVector<BaseFloat> power_spectrum(*window, 0, dim_);
power_spectrum.Scale(scale_);
if (num_samples < frame_length) { power_spectrum(0) = power_spectrum(0) / 2;
return true; power_spectrum(dim_ - 1) = power_spectrum(dim_ - 1) / 2;
} power_spectrum.Add(1e-14);
power_spectrum.ApplyLog();
int32 num_frames = kaldi::NumFrames(num_samples, opts_.frame_opts); feat->CopyFromVec(power_spectrum);
feats->Resize(num_frames * dim_);
Vector<BaseFloat> window;
for (int frame_idx = 0; frame_idx < num_frames; ++frame_idx) {
kaldi::ExtractWindow(0,
waves,
frame_idx,
opts_.frame_opts,
feature_window_funtion_,
&window,
NULL);
SubVector<BaseFloat> output_row(feats->Data() + frame_idx * dim_, dim_);
window.Resize(frame_length, kaldi::kCopyData);
RealFft(&window, true);
kaldi::ComputePowerSpectrum(&window);
SubVector<BaseFloat> power_spectrum(window, 0, dim_);
power_spectrum.Scale(scale);
power_spectrum(0) = power_spectrum(0) / 2;
power_spectrum(dim_ - 1) = power_spectrum(dim_ - 1) / 2;
power_spectrum.Add(1e-14);
power_spectrum.ApplyLog();
output_row.CopyFromVec(power_spectrum);
}
return true; return true;
} }

@ -18,52 +18,41 @@
#include "base/common.h" #include "base/common.h"
#include "frontend/audio/frontend_itf.h" #include "frontend/audio/frontend_itf.h"
#include "kaldi/feat/feature-window.h" #include "kaldi/feat/feature-window.h"
#include "frontend/audio/feature_common.h"
namespace ppspeech { namespace ppspeech {
struct LinearSpectrogramOptions { struct LinearSpectrogramOptions {
kaldi::FrameExtractionOptions frame_opts; kaldi::FrameExtractionOptions frame_opts;
kaldi::BaseFloat streaming_chunk; // second LinearSpectrogramOptions(): frame_opts() {}
LinearSpectrogramOptions() : streaming_chunk(0.1), frame_opts() {}
void Register(kaldi::OptionsItf* opts) {
opts->Register("streaming-chunk",
&streaming_chunk,
"streaming chunk size, default: 0.1 sec");
frame_opts.Register(opts);
}
}; };
class LinearSpectrogram : public FrontendInterface { class LinearSpectrogramComputer {
public: public:
explicit LinearSpectrogram( typedef LinearSpectrogramOptions Options;
const LinearSpectrogramOptions& opts, explicit LinearSpectrogramComputer(const Options& opts);
std::unique_ptr<FrontendInterface> base_extractor);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs); kaldi::FrameExtractionOptions& GetFrameOptions() {
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats); return opts_.frame_opts;
// the dim_ is the dim of single frame feature
virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() {
base_extractor_->Reset();
remained_wav_.Resize(0);
} }
private: bool Compute(kaldi::Vector<kaldi::BaseFloat>* window,
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves, kaldi::Vector<kaldi::BaseFloat>* feat);
kaldi::Vector<kaldi::BaseFloat>* feats);
size_t dim_; int32 Dim() const { return dim_; }
kaldi::FeatureWindowFunction feature_window_funtion_;
kaldi::BaseFloat hanning_window_energy_; bool NeedRawLogEnergy() { return false; }
LinearSpectrogramOptions opts_;
std::unique_ptr<FrontendInterface> base_extractor_; private:
kaldi::Vector<kaldi::BaseFloat> remained_wav_; kaldi::BaseFloat scale_;
int chunk_sample_size_; Options opts_;
DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram); int32 frame_length_;
int32 dim_;
}; };
typedef StreamingFeatureTpl<LinearSpectrogramComputer> LinearSpectrogram;
//DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram);
} // namespace ppspeech } // namespace ppspeech
Loading…
Cancel
Save