add frontend cmakelist

pull/1400/head
SmileGoat 3 years ago
parent f03d48f79b
commit c60277515b

@ -35,3 +35,7 @@ We borrowed a lot of code from these repos to build `model` and `engine`, thanks
* [librosa](https://github.com/librosa/librosa/blob/main/LICENSE.md) * [librosa](https://github.com/librosa/librosa/blob/main/LICENSE.md)
- ISC License - ISC License
- Audio feature - Audio feature
* [ThreadPool](https://github.com/progschj/ThreadPool/blob/master/COPYING)
- zlib License
- ThreadPool

@ -65,7 +65,7 @@ FetchContent_Declare(
URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
) )
FetchContent_MakeAvailable(glog) FetchContent_MakeAvailable(glog)
include_directories(${glog_BINARY_DIR}) include_directories(${glog_BINARY_DIR} ${glog_SOURCE_DIR}/src)
# gtest # gtest
FetchContent_Declare(googletest FetchContent_Declare(googletest

@ -4,11 +4,22 @@ project(speechx LANGUAGES CXX)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/openblas) link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/openblas)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
include_directories( include_directories(
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/kaldi ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
) )
add_subdirectory(kaldi) add_subdirectory(kaldi)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/frontend
)
add_subdirectory(frontend)
add_executable(mfcc-test codelab/feat_test/feature-mfcc-test.cc) add_executable(mfcc-test codelab/feat_test/feature-mfcc-test.cc)
target_link_libraries(mfcc-test kaldi-mfcc) target_link_libraries(mfcc-test kaldi-mfcc)
add_executable(linear_spectrogram_main codelab/feat_test/linear_spectrogram_main.cc)
target_link_libraries(linear_spectrogram_main frontend kaildi-util kaldi-feat)

@ -16,7 +16,7 @@
#include "kaldi/base/kaldi-types.h" #include "kaldi/base/kaldi-types.h"
#include <limits.h> #include <limits>
typedef float BaseFloat; typedef float BaseFloat;
typedef double double64; typedef double double64;
@ -35,7 +35,7 @@ typedef unsigned char uint8;
typedef unsigned short uint16; typedef unsigned short uint16;
typedef unsigned int uint32; typedef unsigned int uint32;
if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD) #if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD)
typedef unsigned long uint64; typedef unsigned long uint64;
#else #else
typedef unsigned long long uint64; typedef unsigned long long uint64;

@ -1,3 +1,23 @@
// Copyright (c) 2012 Jakob Progsch, Václav Zeman
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source
// distribution.
// this code is from https://github.com/progschj/ThreadPool // this code is from https://github.com/progschj/ThreadPool
#ifndef BASE_THREAD_POOL_H #ifndef BASE_THREAD_POOL_H
@ -97,4 +117,4 @@ inline ThreadPool::~ThreadPool()
worker.join(); worker.join();
} }
#endif #endif

@ -2,6 +2,7 @@
#include "frontend/linear_spectrogram.h" #include "frontend/linear_spectrogram.h"
#include "frontend/normalizer.h" #include "frontend/normalizer.h"
#include "frontend/feature_extractor_interface.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "base/log.h" #include "base/log.h"
#include "base/flags.h" #include "base/flags.h"
@ -22,7 +23,7 @@ int main(int argc, char* argv[]) {
ppspeech::LinearSpectrogramOptions opt; ppspeech::LinearSpectrogramOptions opt;
ppspeech::DecibelNormalizerOptions db_norm_opt; ppspeech::DecibelNormalizerOptions db_norm_opt;
std::unique_ptr<ppspeech::FeatureExtractorInterface> base_feature_extractor = std::unique_ptr<ppspeech::FeatureExtractorInterface> base_feature_extractor =
new DecibelNormalizer(db_norm_opt); new ppspeech::DecibelNormalizer(db_norm_opt);
ppspeech::LinearSpectrogram linear_spectrogram(opt, base_featrue_extractor); ppspeech::LinearSpectrogram linear_spectrogram(opt, base_featrue_extractor);
for (; !wav_reader.Done(); wav_reader.Next()) { for (; !wav_reader.Done(); wav_reader.Next()) {

@ -0,0 +1,8 @@
project(frontend)
add_library(frontend
normalizer.cc
linear_spectrogram.cc
)
target_link_libraries(frontend kaldi-matrix)

@ -15,16 +15,14 @@
#pragma once #pragma once
#include "base/basic_types.h" #include "base/basic_types.h"
#incldue "kaldi/matrix/kaldi-vector.h" #include "kaldi/matrix/kaldi-vector.h"
namespace ppspeech { namespace ppspeech {
class FeatureExtractorInterface { class FeatureExtractorInterface {
public: public:
virtual void AcceptWaveform(const kaldi::Vector<kaldi::BaseFloat>& input) = 0; virtual void AcceptWaveform(const kaldi::VectorBase<kaldi::BaseFloat>& input) = 0;
virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat) = 0; virtual void Read(kaldi::VectorBase<kaldi::BaseFloat>* feat) = 0;
virtual void Compute(const kaldi::VectorBase<kaldi::BaseFloat>& input,
kaldi::VectorBae<kaldi::BaseFloat>* feature) = 0;
virtual size_t Dim() const = 0; virtual size_t Dim() const = 0;
}; };

@ -16,15 +16,36 @@
#include "kaldi/base/kaldi-math.h" #include "kaldi/base/kaldi-math.h"
#include "kaldi/matrix/matrix-functions.h" #include "kaldi/matrix/matrix-functions.h"
namespace ppspeech {
using kaldi::int32; using kaldi::int32;
using kaldi::BaseFloat; using kaldi::BaseFloat;
using kaldi::Vector; using kaldi::Vector;
using kaldi::Matrix; using kaldi::Matrix;
using std::vector; using std::vector;
//todo remove later
void CopyVector2StdVector(const kaldi::Vector<BaseFloat>& input,
vector<BaseFloat>* output) {
if (input.Dim() == 0) return;
output->resize(input.Dim());
for (size_t idx = 0; idx < input.Dim(); ++idx) {
(*output)[idx] = input(idx);
}
}
void CopyStdVector2Vector(const vector<BaseFloat>& input,
Vector<BaseFloat>* output) {
if (input.empty()) return;
output->Resize(input.size());
for (size_t idx = 0; idx < input.size(); ++idx) {
(*output)(idx) = input[idx];
}
}
LinearSpectrogram::LinearSpectrogram( LinearSpectrogram::LinearSpectrogram(
const LinearSpectrogramOptions& opts, const LinearSpectrogramOptions& opts,
const std::unique_ptr<FeatureExtractorInterface> base_extractor) { std::unique_ptr<FeatureExtractorInterface> base_extractor) {
base_extractor_ = std::move(base_extractor); base_extractor_ = std::move(base_extractor);
int32 window_size = opts.frame_opts.WindowSize(); int32 window_size = opts.frame_opts.WindowSize();
int32 window_shift = opts.frame_opts.WindowShift(); int32 window_shift = opts.frame_opts.WindowShift();
@ -41,11 +62,8 @@ LinearSpectrogram::LinearSpectrogram(
dim_ = fft_points_ / 2 + 1; // the dimension is Fs/2 Hz dim_ = fft_points_ / 2 + 1; // the dimension is Fs/2 Hz
} }
void LinearSpectrogram::AcceptWavefrom(const Vector<BaseFloat>& input) { void LinearSpectrogram::AcceptWavefrom(const kaldi::VectorBase<BaseFloat>& input) {
wavefrom_.resize(input.Dim()); base_extractor_->AcceptWaveform(input);
for (size_t idx = 0; idx < input.Dim(); ++idx) {
waveform_[idx] = input(idx);
}
} }
void LinearSpectrogram::Hanning(vector<float>* data) const { void LinearSpectrogram::Hanning(vector<float>* data) const {
@ -58,11 +76,11 @@ void LinearSpectrogram::Hanning(vector<float>* data) const {
bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v, bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
vector<BaseFloat>* real, vector<BaseFloat>* real,
vector<BaseFloat>* img) { vector<BaseFloat>* img) const {
if (RealFft(v, true)) { Vector<BaseFloat> v_tmp;
LOG(ERROR) << "compute the fft occurs error"; CopyStdVector2Vector(*v, &v_tmp);
return false; RealFft(&v_tmp, true);
} CopyVector2StdVector(v_tmp, v);
real->push_back(v->at(0)); real->push_back(v->at(0));
img->push_back(0); img->push_back(0);
for (int i = 1; i < v->size() / 2; i++) { for (int i = 1; i < v->size() / 2; i++) {
@ -75,36 +93,28 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
return true; return true;
} }
//todo remove later
void CopyVector2StdVector(const kaldi::Vector<BaseFloat>& input,
vector<BaseFloat>* output) {
}
// todo remove later // todo remove later
bool LinearSpectrogram::ReadFeats(Matrix<BaseFloat>* feats) const { void LinearSpectrogram::ReadFeats(Matrix<BaseFloat>* feats) {
if (wavefrom_.Dim() == 0) { Vector<BaseFloat> tmp;
return false; Compute(tmp, &waveform_);
}
kaldi::Vector<BaseFloat> feats;
Compute(wavefrom_, &feats);
vector<vector<BaseFloat>> result; vector<vector<BaseFloat>> result;
vector<BaseFloat> feats_vec; vector<BaseFloat> feats_vec;
CopyVector2StdVector(feats, &feats_vec); CopyVector2StdVector(waveform_, &feats_vec);
Compute(feats_vec, result); Compute(feats_vec, result);
feats->Resize(result.size(), result[0].size()); feats->Resize(result.size(), result[0].size());
for (int row_idx = 0; row_idx < result.size(); ++row_idx) { for (int row_idx = 0; row_idx < result.size(); ++row_idx) {
for (int col_idx = 0; col_idx < result.size(); ++col_idx) { for (int col_idx = 0; col_idx < result.size(); ++col_idx) {
feats(row_idx, col_idx) = result[row_idx][col_idx]; (*feats)(row_idx, col_idx) = result[row_idx][col_idx];
}
} }
wavefrom_.Resize(0); waveform_.Resize(0);
return true;
} }
// only for test, remove later // only for test, remove later
// todo: compute the feature frame by frame. // todo: compute the feature frame by frame.
void LinearSpectrogram::Compute(const kaldi::VectorBase<kaldi::BaseFloat>& input, void LinearSpectrogram::Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
kaldi::VectorBae<kaldi::BaseFloat>* feature) { kaldi::Vector<kaldi::BaseFloat>* feature) {
base_extractor_->Compute(input, feature); base_extractor_->Read(feature);
} }
// Compute spectrogram feat, only for test, remove later // Compute spectrogram feat, only for test, remove later
@ -112,9 +122,9 @@ void LinearSpectrogram::Compute(const kaldi::VectorBase<kaldi::BaseFloat>& input
bool LinearSpectrogram::Compute(const vector<float>& wave, bool LinearSpectrogram::Compute(const vector<float>& wave,
vector<vector<float>>& feat) { vector<vector<float>>& feat) {
int num_samples = wave.size(); int num_samples = wave.size();
const int& frame_length = opts.frame_opts.WindowSize(); const int& frame_length = opts_.frame_opts.WindowSize();
const int& sample_rate = opts.frame_opts.samp_freq; const int& sample_rate = opts_.frame_opts.samp_freq;
const int& frame_shift = opts.frame_opts.WindowShift(); const int& frame_shift = opts_.frame_opts.WindowShift();
const int& fft_points = fft_points_; const int& fft_points = fft_points_;
const float scale = hanning_window_energy_ * frame_shift; const float scale = hanning_window_energy_ * frame_shift;
@ -132,11 +142,11 @@ bool LinearSpectrogram::Compute(const vector<float>& wave,
for (int i = 0; i < num_frames; ++i) { for (int i = 0; i < num_frames; ++i) {
vector<float> data(wave.data() + i * frame_shift, vector<float> data(wave.data() + i * frame_shift,
wave.data() + i * frame_shift + frame_length); wave.data() + i * frame_shift + frame_length);
Hanning(data); Hanning(&data);
fft_img.clear(); fft_img.clear();
fft_real.clear(); fft_real.clear();
v.assign(data.begin(), data.end()); v.assign(data.begin(), data.end());
if (NumpyFft(&v, fft_real, fft_img)) { if (NumpyFft(&v, &fft_real, &fft_img)) {
LOG(ERROR)<< i << " fft compute occurs error, please checkout the input data"; LOG(ERROR)<< i << " fft compute occurs error, please checkout the input data";
return false; return false;
} }
@ -155,5 +165,8 @@ bool LinearSpectrogram::Compute(const vector<float>& wave,
// log added eps=1e-14 // log added eps=1e-14
feat[i][j] = std::log(feat[i][j] + 1e-14); feat[i][j] = std::log(feat[i][j] + 1e-14);
} }
}
return true; return true;
} }
} // namespace ppspeech

@ -8,7 +8,7 @@
namespace ppspeech { namespace ppspeech {
struct LinearSpectrogramOptions { struct LinearSpectrogramOptions {
kaldi::FrameExtrationOptions frame_opts; kaldi::FrameExtractionOptions frame_opts;
LinearSpectrogramOptions(): LinearSpectrogramOptions():
frame_opts() {} frame_opts() {}
@ -19,19 +19,19 @@ struct LinearSpectrogramOptions {
class LinearSpectrogram : public FeatureExtractorInterface { class LinearSpectrogram : public FeatureExtractorInterface {
public: public:
explict LinearSpectrogram(const LinearSpectrogramOptions& opts, explicit LinearSpectrogram(const LinearSpectrogramOptions& opts,
const std::unique_ptr<FeatureExtractorInterface> base_extractor); std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input); virtual void AcceptWavefrom(const kaldi::VectorBase<kaldi::BaseFloat>& input);
virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat); virtual void Read(kaldi::VectorBase<kaldi::BaseFloat>* feat);
virtual size_t Dim() const { return dim_; } virtual size_t Dim() const { return dim_; }
void ReadFeats(kaldi::Matrix<kaldi::BaesFloat>* feats) const; void ReadFeats(kaldi::Matrix<kaldi::BaseFloat>* feats);
private: private:
void Hanning(std::vector<kaldi::BaseFloat>& data) const; void Hanning(std::vector<kaldi::BaseFloat>* data) const;
kaldi::int32 Compute(const std::vector<kaldi::BaseFloat>& wave, bool Compute(const std::vector<kaldi::BaseFloat>& wave,
std::vector<std::vector<kaldi::BaseFloat>>& feat); std::vector<std::vector<kaldi::BaseFloat>>& feat);
void Compute(const kaldi::VectorBase<kaldi::BaseFloat>& input, void Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
kaldi::VectorBae<kaldi::BaseFloat>* feature); kaldi::Vector<kaldi::BaseFloat>* feature);
bool NumpyFft(std::vector<kaldi::BaseFloat>* v, bool NumpyFft(std::vector<kaldi::BaseFloat>* v,
std::vector<kaldi::BaseFloat>* real, std::vector<kaldi::BaseFloat>* real,
std::vector<kaldi::BaseFloat>* img) const; std::vector<kaldi::BaseFloat>* img) const;
@ -41,7 +41,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
std::vector<kaldi::BaseFloat> hanning_window_; std::vector<kaldi::BaseFloat> hanning_window_;
kaldi::BaseFloat hanning_window_energy_; kaldi::BaseFloat hanning_window_energy_;
LinearSpectrogramOptions opts_; LinearSpectrogramOptions opts_;
kaldi::Vector<kaldi::BaseFloat> wavefrom_; // remove later, todo(SmileGoat) kaldi::Vector<kaldi::BaseFloat> waveform_; // remove later, todo(SmileGoat)
std::unique_ptr<FeatureExtractorInterface> base_extractor_; std::unique_ptr<FeatureExtractorInterface> base_extractor_;
DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram); DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram);
}; };

@ -1,35 +1,62 @@
#include "frontend/normalizer.h" #include "frontend/normalizer.h"
DecibelNormalizer::DecibelNormalizer( namespace ppspeech {
const DecibelNormalizerOptions& opts) {
using kaldi::Vector;
using kaldi::BaseFloat;
using std::vector;
DecibelNormalizer::DecibelNormalizer(const DecibelNormalizerOptions& opts) {
opts_ = opts;
} }
void DecibelNormalizer::AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input) { void DecibelNormalizer::AcceptWavefrom(const Vector<BaseFloat>& input) {
waveform_ = input;
}
void DecibelNormalizer::Read(Vector<BaseFloat>* feat) {
if (waveform_.Dim() == 0) return;
Compute(waveform_, feat);
} }
void DecibelNormalizer::Read(kaldi::Vector<kaldi::BaseFloat>* feat) { //todo remove later
void CopyVector2StdVector(const kaldi::Vector<BaseFloat>& input,
vector<BaseFloat>* output) {
if (input.Dim() == 0) return;
output->resize(input.Dim());
for (size_t idx = 0; idx < input.Dim(); ++idx) {
(*output)[idx] = input(idx);
}
}
void CopyStdVector2Vector(const vector<BaseFloat>& input,
Vector<BaseFloat>* output) {
if (input.empty()) return;
output->Resize(input.size());
for (size_t idx = 0; idx < input.size(); ++idx) {
(*output)(idx) = input[idx];
}
} }
bool DecibelNormalizer::Compute(const Vector<kaldi::BaseFloat>& input, bool DecibelNormalizer::Compute(const Vector<BaseFloat>& input,
kaldi::Vector<kaldi::BaseFloat>* feat) { Vector<BaseFloat>* feat) const {
// calculate db rms // calculate db rms
float rms_db = 0.0; BaseFloat rms_db = 0.0;
float mean_square = 0.0; BaseFloat mean_square = 0.0;
float gain = 0.0; BaseFloat gain = 0.0;
vector<BaseFloat> smaples; BaseFloat wave_float_normlization = 1.0f / (std::pow(2, 16 - 1));
samples.resize(input.Size());
vector<BaseFloat> samples;
samples.resize(input.Dim());
for (int32 i = 0; i < samples.size(); ++i) { for (int32 i = 0; i < samples.size(); ++i) {
samples[i] = input(i); samples[i] = input(i);
} }
// square // square
for (auto &d : samples) { for (auto &d : samples) {
if (_opts.convert_int_float) { if (opts_.convert_int_float) {
d = d * WAVE_FLOAT_NORMALIZATION; d = d * wave_float_normlization;
} }
mean_square += d * d; mean_square += d * d;
} }
@ -37,12 +64,12 @@ bool DecibelNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
// mean // mean
mean_square /= samples.size(); mean_square /= samples.size();
rms_db = 10 * std::log10(mean_square); rms_db = 10 * std::log10(mean_square);
gain = opts.target_db - rms_db; gain = opts_.target_db - rms_db;
if (gain > opts.max_gain_db) { if (gain > opts_.max_gain_db) {
LOG(ERROR) << "Unable to normalize segment to " << opts.target_db << "dB," LOG(ERROR) << "Unable to normalize segment to " << opts_.target_db << "dB,"
<< "because the the probable gain have exceeds opts.max_gain_db" << "because the the probable gain have exceeds opts_.max_gain_db"
<< opts.max_gain_db << "dB."; << opts_.max_gain_db << "dB.";
return false; return false;
} }
@ -51,27 +78,28 @@ bool DecibelNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
// python item *= 10.0 ** (gain / 20.0) // python item *= 10.0 ** (gain / 20.0)
item *= std::pow(10.0, gain / 20.0); item *= std::pow(10.0, gain / 20.0);
} }
CopyStdVector2Vector(samples, feat);
return true; return true;
} }
/*
PPNormalizer::PPNormalizer( PPNormalizer::PPNormalizer(
const PPNormalizerOptions& opts, const PPNormalizerOptions& opts,
const std::unique_ptr<FeatureExtractorInterface>& pre_extractor) { const std::unique_ptr<FeatureExtractorInterface>& pre_extractor) {
} }
void PPNormalizer::AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input) { void PPNormalizer::AcceptWavefrom(const Vector<BaseFloat>& input) {
} }
void PPNormalizer::Read(kaldi::Vector<kaldi::BaseFloat>* feat) { void PPNormalizer::Read(Vector<BaseFloat>* feat) {
} }
bool PPNormalizer::Compute(const Vector<kaldi::BaseFloat>& input, bool PPNormalizer::Compute(const Vector<BaseFloat>& input,
kaldi::Vector<kaldi::BaseFloat>>* feat) { Vector<BaseFloat>>* feat) {
if ((input.Dim() % mean_.Dim()) == 0) { if ((input.Dim() % mean_.Dim()) == 0) {
LOG(ERROR) << "CMVN dimension is wrong!"; LOG(ERROR) << "CMVN dimension is wrong!";
return false; return false;
@ -93,4 +121,6 @@ bool PPNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
} }
return true; return true;
} }*/
} // namespace ppspeech

@ -1,7 +1,9 @@
#pragma once #pragma once
#include "base/common.h"
#include "frontend/feature_extractor_interface.h" #include "frontend/feature_extractor_interface.h"
#include "kaldi/util/options-itf.h"
namespace ppspeech { namespace ppspeech {
@ -9,6 +11,7 @@ namespace ppspeech {
struct DecibelNormalizerOptions { struct DecibelNormalizerOptions {
float target_db; float target_db;
float max_gain_db; float max_gain_db;
bool convert_int_float;
DecibelNormalizerOptions() : DecibelNormalizerOptions() :
target_db(-20), target_db(-20),
max_gain_db(300.0), max_gain_db(300.0),
@ -23,16 +26,19 @@ struct DecibelNormalizerOptions {
class DecibelNormalizer : public FeatureExtractorInterface { class DecibelNormalizer : public FeatureExtractorInterface {
public: public:
explict DecibelNormalizer(const DecibelNormalizerOptions& opts, explicit DecibelNormalizer(const DecibelNormalizerOptions& opts);
const std::unique_ptr<FeatureExtractorInterface>& pre_extractor); virtual void AcceptWavefrom(const kaldi::VectorBase<kaldi::BaseFloat>& input);
virtual void AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input); virtual void Read(kaldi::VectorBase<kaldi::BaseFloat>* feat);
virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat); virtual size_t Dim() const { return 0; }
virtual size_t Dim() const;
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& input, bool Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
kaldi::Vector<kaldi::BaseFloat>>* feat); kaldi::Vector<kaldi::BaseFloat>* feat) const;
private: private:
DecibelNormalizerOptions opts_;
std::unique_ptr<FeatureExtractorInterface> base_extractor_;
kaldi::Vector<kaldi::BaseFloat> waveform_;
}; };
/*
struct NormalizerOptions { struct NormalizerOptions {
std::string mean_std_path; std::string mean_std_path;
NormalizerOptions() : NormalizerOptions() :
@ -61,5 +67,5 @@ class PPNormalizer : public FeatureExtractorInterface {
kaldi::Vector<float> variance_; kaldi::Vector<float> variance_;
NormalizerOptions _opts; NormalizerOptions _opts;
}; };
*/
} // namespace ppspeech } // namespace ppspeech
Loading…
Cancel
Save