fix typo & make build success

pull/1400/head
SmileGoat 3 years ago
parent c60277515b
commit 42c8d0dd97

@ -22,4 +22,4 @@ add_executable(mfcc-test codelab/feat_test/feature-mfcc-test.cc)
target_link_libraries(mfcc-test kaldi-mfcc)
add_executable(linear_spectrogram_main codelab/feat_test/linear_spectrogram_main.cc)
target_link_libraries(linear_spectrogram_main frontend kaildi-util kaldi-feat)
target_link_libraries(linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog)

@ -14,4 +14,4 @@
#pragma once
#include "fst/flags.h"
#include "gflags/gflags.h"

@ -12,7 +12,7 @@ DEFINE_string(wav_rspecifier, "", "test wav path");
DEFINE_string(feature_wspecifier, "", "test wav ark");
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, false);
gflags::ParseCommandLineFlags(&argc, &argv, false);
google::InitGoogleLogging(argv[0]);
kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(FLAGS_wav_rspecifier);
@ -22,9 +22,9 @@ int main(int argc, char* argv[]) {
int32 num_done = 0, num_err = 0;
ppspeech::LinearSpectrogramOptions opt;
ppspeech::DecibelNormalizerOptions db_norm_opt;
std::unique_ptr<ppspeech::FeatureExtractorInterface> base_feature_extractor =
new ppspeech::DecibelNormalizer(db_norm_opt);
ppspeech::LinearSpectrogram linear_spectrogram(opt, base_featrue_extractor);
std::unique_ptr<ppspeech::FeatureExtractorInterface> base_feature_extractor(
new ppspeech::DecibelNormalizer(db_norm_opt));
ppspeech::LinearSpectrogram linear_spectrogram(opt, std::move(base_feature_extractor));
for (; !wav_reader.Done(); wav_reader.Next()) {
std::string utt = wav_reader.Key();

@ -21,11 +21,12 @@ namespace ppspeech {
using kaldi::int32;
using kaldi::BaseFloat;
using kaldi::Vector;
using kaldi::VectorBase;
using kaldi::Matrix;
using std::vector;
//todo remove later
void CopyVector2StdVector(const kaldi::Vector<BaseFloat>& input,
void CopyVector2StdVector_(const VectorBase<BaseFloat>& input,
vector<BaseFloat>* output) {
if (input.Dim() == 0) return;
output->resize(input.Dim());
@ -34,7 +35,7 @@ void CopyVector2StdVector(const kaldi::Vector<BaseFloat>& input,
}
}
void CopyStdVector2Vector(const vector<BaseFloat>& input,
void CopyStdVector2Vector_(const vector<BaseFloat>& input,
Vector<BaseFloat>* output) {
if (input.empty()) return;
output->Resize(input.size());
@ -62,7 +63,7 @@ LinearSpectrogram::LinearSpectrogram(
dim_ = fft_points_ / 2 + 1; // the dimension is Fs/2 Hz
}
void LinearSpectrogram::AcceptWavefrom(const kaldi::VectorBase<BaseFloat>& input) {
void LinearSpectrogram::AcceptWaveform(const VectorBase<BaseFloat>& input) {
base_extractor_->AcceptWaveform(input);
}
@ -78,9 +79,9 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
vector<BaseFloat>* real,
vector<BaseFloat>* img) const {
Vector<BaseFloat> v_tmp;
CopyStdVector2Vector(*v, &v_tmp);
CopyStdVector2Vector_(*v, &v_tmp);
RealFft(&v_tmp, true);
CopyVector2StdVector(v_tmp, v);
CopyVector2StdVector_(v_tmp, v);
real->push_back(v->at(0));
img->push_back(0);
for (int i = 1; i < v->size() / 2; i++) {
@ -96,10 +97,11 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
// todo remove later
void LinearSpectrogram::ReadFeats(Matrix<BaseFloat>* feats) {
Vector<BaseFloat> tmp;
waveform_.Resize(base_extractor_->Dim());
Compute(tmp, &waveform_);
vector<vector<BaseFloat>> result;
vector<BaseFloat> feats_vec;
CopyVector2StdVector(waveform_, &feats_vec);
CopyVector2StdVector_(waveform_, &feats_vec);
Compute(feats_vec, result);
feats->Resize(result.size(), result[0].size());
for (int row_idx = 0; row_idx < result.size(); ++row_idx) {
@ -110,10 +112,15 @@ void LinearSpectrogram::ReadFeats(Matrix<BaseFloat>* feats) {
waveform_.Resize(0);
}
void LinearSpectrogram::Read(VectorBase<BaseFloat>* feat) {
// todo
return;
}
// only for test, remove later
// todo: compute the feature frame by frame.
void LinearSpectrogram::Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
kaldi::Vector<kaldi::BaseFloat>* feature) {
void LinearSpectrogram::Compute(const VectorBase<kaldi::BaseFloat>& input,
VectorBase<kaldi::BaseFloat>* feature) {
base_extractor_->Read(feature);
}

@ -21,7 +21,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
public:
explicit LinearSpectrogram(const LinearSpectrogramOptions& opts,
std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void AcceptWavefrom(const kaldi::VectorBase<kaldi::BaseFloat>& input);
virtual void AcceptWaveform(const kaldi::VectorBase<kaldi::BaseFloat>& input);
virtual void Read(kaldi::VectorBase<kaldi::BaseFloat>* feat);
virtual size_t Dim() const { return dim_; }
void ReadFeats(kaldi::Matrix<kaldi::BaseFloat>* feats);
@ -30,8 +30,8 @@ class LinearSpectrogram : public FeatureExtractorInterface {
void Hanning(std::vector<kaldi::BaseFloat>* data) const;
bool Compute(const std::vector<kaldi::BaseFloat>& wave,
std::vector<std::vector<kaldi::BaseFloat>>& feat);
void Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
kaldi::Vector<kaldi::BaseFloat>* feature);
void Compute(const kaldi::VectorBase<kaldi::BaseFloat>& input,
kaldi::VectorBase<kaldi::BaseFloat>* feature);
bool NumpyFft(std::vector<kaldi::BaseFloat>* v,
std::vector<kaldi::BaseFloat>* real,
std::vector<kaldi::BaseFloat>* img) const;

@ -4,24 +4,28 @@
namespace ppspeech {
using kaldi::Vector;
using kaldi::VectorBase;
using kaldi::BaseFloat;
using std::vector;
DecibelNormalizer::DecibelNormalizer(const DecibelNormalizerOptions& opts) {
opts_ = opts;
dim_ = 0;
}
void DecibelNormalizer::AcceptWavefrom(const Vector<BaseFloat>& input) {
waveform_ = input;
void DecibelNormalizer::AcceptWaveform(const kaldi::VectorBase<BaseFloat>& input) {
dim_ = input.Dim();
waveform_.Resize(input.Dim());
waveform_.CopyFromVec(input);
}
void DecibelNormalizer::Read(Vector<BaseFloat>* feat) {
void DecibelNormalizer::Read(kaldi::VectorBase<BaseFloat>* feat) {
if (waveform_.Dim() == 0) return;
Compute(waveform_, feat);
}
//todo remove later
void CopyVector2StdVector(const kaldi::Vector<BaseFloat>& input,
void CopyVector2StdVector(const kaldi::VectorBase<BaseFloat>& input,
vector<BaseFloat>* output) {
if (input.Dim() == 0) return;
output->resize(input.Dim());
@ -31,16 +35,16 @@ void CopyVector2StdVector(const kaldi::Vector<BaseFloat>& input,
}
void CopyStdVector2Vector(const vector<BaseFloat>& input,
Vector<BaseFloat>* output) {
VectorBase<BaseFloat>* output) {
if (input.empty()) return;
output->Resize(input.size());
assert(input.size() == output->Dim());
for (size_t idx = 0; idx < input.size(); ++idx) {
(*output)(idx) = input[idx];
}
}
bool DecibelNormalizer::Compute(const Vector<BaseFloat>& input,
Vector<BaseFloat>* feat) const {
bool DecibelNormalizer::Compute(const VectorBase<BaseFloat>& input,
VectorBase<BaseFloat>* feat) const {
// calculate db rms
BaseFloat rms_db = 0.0;
BaseFloat mean_square = 0.0;

@ -27,13 +27,14 @@ struct DecibelNormalizerOptions {
class DecibelNormalizer : public FeatureExtractorInterface {
public:
explicit DecibelNormalizer(const DecibelNormalizerOptions& opts);
virtual void AcceptWavefrom(const kaldi::VectorBase<kaldi::BaseFloat>& input);
virtual void AcceptWaveform(const kaldi::VectorBase<kaldi::BaseFloat>& input);
virtual void Read(kaldi::VectorBase<kaldi::BaseFloat>* feat);
virtual size_t Dim() const { return 0; }
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
kaldi::Vector<kaldi::BaseFloat>* feat) const;
bool Compute(const kaldi::VectorBase<kaldi::BaseFloat>& input,
kaldi::VectorBase<kaldi::BaseFloat>* feat) const;
private:
DecibelNormalizerOptions opts_;
size_t dim_;
std::unique_ptr<FeatureExtractorInterface> base_extractor_;
kaldi::Vector<kaldi::BaseFloat> waveform_;
};

Loading…
Cancel
Save