parent
88275aff05
commit
a01fa866a4
@ -0,0 +1,97 @@
|
||||
|
||||
#include "frontend/normalizer.h"
|
||||
|
||||
DecibelNormalizer::DecibelNormalizer(
|
||||
const DecibelNormalizerOptions& opts,
|
||||
const std::unique_ptr<FeatureExtractorInterface>& pre_extractor) {
|
||||
|
||||
}
|
||||
|
||||
void DecibelNormalizer::AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input) {
|
||||
|
||||
}
|
||||
|
||||
void DecibelNormalizer::Read(kaldi::Vector<kaldi::BaseFloat>* feat) {
|
||||
|
||||
}
|
||||
|
||||
bool DecibelNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
|
||||
kaldi::Vector<kaldi::BaseFloat>* feat) {
|
||||
// calculate db rms
|
||||
float rms_db = 0.0;
|
||||
float mean_square = 0.0;
|
||||
float gain = 0.0;
|
||||
vector<BaseFloat> smaples;
|
||||
samples.resize(input.Size());
|
||||
for (int32 i = 0; i < samples.size(); ++i) {
|
||||
samples[i] = input(i);
|
||||
}
|
||||
|
||||
// square
|
||||
for (auto &d : samples) {
|
||||
if (_opts.convert_int_float) {
|
||||
d = d * WAVE_FLOAT_NORMALIZATION;
|
||||
}
|
||||
mean_square += d * d;
|
||||
}
|
||||
|
||||
// mean
|
||||
mean_square /= samples.size();
|
||||
rms_db = 10 * std::log10(mean_square);
|
||||
gain = opts.target_db - rms_db;
|
||||
|
||||
if (gain > opts.max_gain_db) {
|
||||
LOG(ERROR) << "Unable to normalize segment to " << opts.target_db << "dB,"
|
||||
<< "because the the probable gain have exceeds opts.max_gain_db"
|
||||
<< opts.max_gain_db << "dB.";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Note that this is an in-place transformation.
|
||||
for (auto &item : samples) {
|
||||
// python item *= 10.0 ** (gain / 20.0)
|
||||
item *= std::pow(10.0, gain / 20.0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
PPNormalizer::PPNormalizer(
|
||||
const PPNormalizerOptions& opts,
|
||||
const std::unique_ptr<FeatureExtractorInterface>& pre_extractor) {
|
||||
|
||||
}
|
||||
|
||||
void PPNormalizer::AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input) {
|
||||
|
||||
}
|
||||
|
||||
void PPNormalizer::Read(kaldi::Vector<kaldi::BaseFloat>* feat) {
|
||||
|
||||
}
|
||||
|
||||
bool PPNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
|
||||
kaldi::Vector<kaldi::BaseFloat>>* feat) {
|
||||
if ((input.Dim() % mean_.Dim()) == 0) {
|
||||
LOG(ERROR) << "CMVN dimension is wrong!";
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
int32 size = mean_.Dim();
|
||||
feat->Resize(input.Dim());
|
||||
for (int32 row_idx = 0; row_idx < j; ++row_idx) {
|
||||
int32 base_idx = row_idx * size;
|
||||
for (int32 idx = 0; idx < mean_.Dim(); ++idx) {
|
||||
(*feat)(base_idx + idx) = (input(base_dix + idx) - mean_(idx))* variance_(idx);
|
||||
}
|
||||
}
|
||||
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "frontend/feature_extractor_interface.h"
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
|
||||
struct DecibelNormalizerOptions {
|
||||
float target_db;
|
||||
float max_gain_db;
|
||||
DecibelNormalizerOptions() :
|
||||
target_db(-20),
|
||||
max_gain_db(300.0),
|
||||
convert_int_float(false) {}
|
||||
|
||||
void Register(kaldi::OptionsItf* opts) {
|
||||
opts->Register("target-db", &target_db, "target db for db normalization");
|
||||
opts->Register("max-gain-db", &max_gain_db, "max gain db for db normalization");
|
||||
opts->Register("convert-int-float", &convert_int_float, "if convert int samples to float");
|
||||
}
|
||||
};
|
||||
|
||||
class DecibelNormalizer : public FeatureExtractorInterface {
|
||||
public:
|
||||
explict DecibelNormalizer(const DecibelNormalizerOptions& opts,
|
||||
const std::unique_ptr<FeatureExtractorInterface>& pre_extractor);
|
||||
virtual void AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input);
|
||||
virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat);
|
||||
virtual size_t Dim() const;
|
||||
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
|
||||
kaldi::Vector<kaldi::BaseFloat>>* feat);
|
||||
private:
|
||||
};
|
||||
|
||||
struct NormalizerOptions {
|
||||
std::string mean_std_path;
|
||||
NormalizerOptions() :
|
||||
mean_std_path("") {}
|
||||
|
||||
void Register(kaldi::OptionsItf* opts) {
|
||||
opts->Register("mean-std", &mean_std_path, "mean std file");
|
||||
}
|
||||
};
|
||||
|
||||
// todo refactor later (SmileGoat)
|
||||
class PPNormalizer : public FeatureExtractorInterface {
|
||||
public:
|
||||
explicit PPNormalizer(const NormalizerOptions& opts,
|
||||
const std::unique_ptr<FeatureExtractorInterface>& pre_extractor);
|
||||
~PPNormalizer() {}
|
||||
virtual void AcceptWavefrom(const kaldi::Vector<kaldi::BaseFloat>& input);
|
||||
virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat);
|
||||
virtual size_t Dim() const;
|
||||
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& input,
|
||||
kaldi::Vector<kaldi::BaseFloat>>& feat);
|
||||
|
||||
private:
|
||||
bool _initialized;
|
||||
kaldi::Vector<float> mean_;
|
||||
kaldi::Vector<float> variance_;
|
||||
NormalizerOptions _opts;
|
||||
};
|
||||
|
||||
} // namespace ppspeech
|
Loading…
Reference in new issue