From 42c8d0dd97cfc1437fb48fa4f50833022ff31d57 Mon Sep 17 00:00:00 2001 From: SmileGoat Date: Sun, 13 Feb 2022 17:59:48 +0800 Subject: [PATCH] fix typo & make build success --- speechx/speechx/CMakeLists.txt | 2 +- speechx/speechx/base/flags.h | 2 +- .../feat_test/linear_spectrogram_main.cc | 8 +++---- .../speechx/frontend/linear_spectrogram.cc | 23 ++++++++++++------- speechx/speechx/frontend/linear_spectrogram.h | 6 ++--- speechx/speechx/frontend/normalizer.cc | 20 +++++++++------- speechx/speechx/frontend/normalizer.h | 7 +++--- 7 files changed, 40 insertions(+), 28 deletions(-) diff --git a/speechx/speechx/CMakeLists.txt b/speechx/speechx/CMakeLists.txt index d05c7034..25e7b1e3 100644 --- a/speechx/speechx/CMakeLists.txt +++ b/speechx/speechx/CMakeLists.txt @@ -22,4 +22,4 @@ add_executable(mfcc-test codelab/feat_test/feature-mfcc-test.cc) target_link_libraries(mfcc-test kaldi-mfcc) add_executable(linear_spectrogram_main codelab/feat_test/linear_spectrogram_main.cc) -target_link_libraries(linear_spectrogram_main frontend kaildi-util kaldi-feat) +target_link_libraries(linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog) diff --git a/speechx/speechx/base/flags.h b/speechx/speechx/base/flags.h index 41df0d45..2808fac3 100644 --- a/speechx/speechx/base/flags.h +++ b/speechx/speechx/base/flags.h @@ -14,4 +14,4 @@ #pragma once -#include "fst/flags.h" +#include "gflags/gflags.h" diff --git a/speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc b/speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc index de6d42ec..00162abe 100644 --- a/speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc +++ b/speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc @@ -12,7 +12,7 @@ DEFINE_string(wav_rspecifier, "", "test wav path"); DEFINE_string(feature_wspecifier, "", "test wav ark"); int main(int argc, char* argv[]) { - google::ParseCommandLineFlags(&argc, &argv, false); + gflags::ParseCommandLineFlags(&argc, &argv, false); google::InitGoogleLogging(argv[0]); kaldi::SequentialTableReader wav_reader(FLAGS_wav_rspecifier); @@ -22,9 +22,9 @@ int main(int argc, char* argv[]) { int32 num_done = 0, num_err = 0; ppspeech::LinearSpectrogramOptions opt; ppspeech::DecibelNormalizerOptions db_norm_opt; - std::unique_ptr base_feature_extractor = - new ppspeech::DecibelNormalizer(db_norm_opt); - ppspeech::LinearSpectrogram linear_spectrogram(opt, base_featrue_extractor); + std::unique_ptr base_feature_extractor( + new ppspeech::DecibelNormalizer(db_norm_opt)); + ppspeech::LinearSpectrogram linear_spectrogram(opt, std::move(base_feature_extractor)); for (; !wav_reader.Done(); wav_reader.Next()) { std::string utt = wav_reader.Key(); diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/linear_spectrogram.cc index a1d72599..a23b4494 100644 --- a/speechx/speechx/frontend/linear_spectrogram.cc +++ b/speechx/speechx/frontend/linear_spectrogram.cc @@ -21,11 +21,12 @@ namespace ppspeech { using kaldi::int32; using kaldi::BaseFloat; using kaldi::Vector; +using kaldi::VectorBase; using kaldi::Matrix; using std::vector; //todo remove later -void CopyVector2StdVector(const kaldi::Vector& input, +void CopyVector2StdVector_(const VectorBase& input, vector* output) { if (input.Dim() == 0) return; output->resize(input.Dim()); @@ -34,7 +35,7 @@ void CopyVector2StdVector(const kaldi::Vector& input, } } -void CopyStdVector2Vector(const vector& input, +void CopyStdVector2Vector_(const vector& input, Vector* output) { if (input.empty()) return; output->Resize(input.size()); @@ -62,7 +63,7 @@ LinearSpectrogram::LinearSpectrogram( dim_ = fft_points_ / 2 + 1; // the dimension is Fs/2 Hz } -void LinearSpectrogram::AcceptWavefrom(const kaldi::VectorBase& input) { +void LinearSpectrogram::AcceptWaveform(const VectorBase& input) { base_extractor_->AcceptWaveform(input); } @@ -78,9 +79,9 @@ bool LinearSpectrogram::NumpyFft(vector* v, vector* real, vector* img) const { Vector v_tmp; - CopyStdVector2Vector(*v, &v_tmp); + CopyStdVector2Vector_(*v, &v_tmp); RealFft(&v_tmp, true); - CopyVector2StdVector(v_tmp, v); + CopyVector2StdVector_(v_tmp, v); real->push_back(v->at(0)); img->push_back(0); for (int i = 1; i < v->size() / 2; i++) { @@ -96,10 +97,11 @@ bool LinearSpectrogram::NumpyFft(vector* v, // todo remove later void LinearSpectrogram::ReadFeats(Matrix* feats) { Vector tmp; + waveform_.Resize(base_extractor_->Dim()); Compute(tmp, &waveform_); vector> result; vector feats_vec; - CopyVector2StdVector(waveform_, &feats_vec); + CopyVector2StdVector_(waveform_, &feats_vec); Compute(feats_vec, result); feats->Resize(result.size(), result[0].size()); for (int row_idx = 0; row_idx < result.size(); ++row_idx) { @@ -110,10 +112,15 @@ void LinearSpectrogram::ReadFeats(Matrix* feats) { waveform_.Resize(0); } +void LinearSpectrogram::Read(VectorBase* feat) { + // todo + return; +} + // only for test, remove later // todo: compute the feature frame by frame. -void LinearSpectrogram::Compute(const kaldi::Vector& input, - kaldi::Vector* feature) { +void LinearSpectrogram::Compute(const VectorBase& input, + VectorBase* feature) { base_extractor_->Read(feature); } diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/linear_spectrogram.h index 3e541f7f..0923acee 100644 --- a/speechx/speechx/frontend/linear_spectrogram.h +++ b/speechx/speechx/frontend/linear_spectrogram.h @@ -21,7 +21,7 @@ class LinearSpectrogram : public FeatureExtractorInterface { public: explicit LinearSpectrogram(const LinearSpectrogramOptions& opts, std::unique_ptr base_extractor); - virtual void AcceptWavefrom(const kaldi::VectorBase& input); + virtual void AcceptWaveform(const kaldi::VectorBase& input); virtual void Read(kaldi::VectorBase* feat); virtual size_t Dim() const { return dim_; } void ReadFeats(kaldi::Matrix* feats); @@ -30,8 +30,8 @@ class LinearSpectrogram : public FeatureExtractorInterface { void Hanning(std::vector* data) const; bool Compute(const std::vector& wave, std::vector>& feat); - void Compute(const kaldi::Vector& input, - kaldi::Vector* feature); + void Compute(const kaldi::VectorBase& input, + kaldi::VectorBase* feature); bool NumpyFft(std::vector* v, std::vector* real, std::vector* img) const; diff --git a/speechx/speechx/frontend/normalizer.cc b/speechx/speechx/frontend/normalizer.cc index dde4a98a..04e88bf4 100644 --- a/speechx/speechx/frontend/normalizer.cc +++ b/speechx/speechx/frontend/normalizer.cc @@ -4,24 +4,28 @@ namespace ppspeech { using kaldi::Vector; +using kaldi::VectorBase; using kaldi::BaseFloat; using std::vector; DecibelNormalizer::DecibelNormalizer(const DecibelNormalizerOptions& opts) { opts_ = opts; + dim_ = 0; } -void DecibelNormalizer::AcceptWavefrom(const Vector& input) { - waveform_ = input; +void DecibelNormalizer::AcceptWaveform(const kaldi::VectorBase& input) { + dim_ = input.Dim(); + waveform_.Resize(input.Dim()); + waveform_.CopyFromVec(input); } -void DecibelNormalizer::Read(Vector* feat) { +void DecibelNormalizer::Read(kaldi::VectorBase* feat) { if (waveform_.Dim() == 0) return; Compute(waveform_, feat); } //todo remove later -void CopyVector2StdVector(const kaldi::Vector& input, +void CopyVector2StdVector(const kaldi::VectorBase& input, vector* output) { if (input.Dim() == 0) return; output->resize(input.Dim()); @@ -31,16 +35,16 @@ void CopyVector2StdVector(const kaldi::Vector& input, } void CopyStdVector2Vector(const vector& input, - Vector* output) { + VectorBase* output) { if (input.empty()) return; - output->Resize(input.size()); + assert(input.size() == output->Dim()); for (size_t idx = 0; idx < input.size(); ++idx) { (*output)(idx) = input[idx]; } } -bool DecibelNormalizer::Compute(const Vector& input, - Vector* feat) const { +bool DecibelNormalizer::Compute(const VectorBase& input, + VectorBase* feat) const { // calculate db rms BaseFloat rms_db = 0.0; BaseFloat mean_square = 0.0; diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h index 4e914456..3bf36cfc 100644 --- a/speechx/speechx/frontend/normalizer.h +++ b/speechx/speechx/frontend/normalizer.h @@ -27,13 +27,14 @@ struct DecibelNormalizerOptions { class DecibelNormalizer : public FeatureExtractorInterface { public: explicit DecibelNormalizer(const DecibelNormalizerOptions& opts); - virtual void AcceptWavefrom(const kaldi::VectorBase& input); + virtual void AcceptWaveform(const kaldi::VectorBase& input); virtual void Read(kaldi::VectorBase* feat); virtual size_t Dim() const { return 0; } - bool Compute(const kaldi::Vector& input, - kaldi::Vector* feat) const; + bool Compute(const kaldi::VectorBase& input, + kaldi::VectorBase* feat) const; private: DecibelNormalizerOptions opts_; + size_t dim_; std::unique_ptr base_extractor_; kaldi::Vector waveform_; };