rename arg of Accept & Read

pull/1542/head
SmileGoat 4 years ago
parent 22fe1c9dbe
commit 027feae9f2

@ -40,7 +40,7 @@ void FeatureCache::Accept(
} }
// pop feature chunk // pop feature chunk
bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* output_feats) { bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
kaldi::Timer timer; kaldi::Timer timer;
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
while (cache_.empty() && base_extractor_->IsFinished() == false) { while (cache_.empty() && base_extractor_->IsFinished() == false) {
@ -53,8 +53,8 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* output_feats) {
usleep(1000); // sleep 1 ms usleep(1000); // sleep 1 ms
} }
if (cache_.empty()) return false; if (cache_.empty()) return false;
output_feats->Resize(cache_.front().Dim()); feats->Resize(cache_.front().Dim());
output_feats->CopyFromVec(cache_.front()); feats->CopyFromVec(cache_.front());
cache_.pop(); cache_.pop();
ready_feed_condition_.notify_one(); ready_feed_condition_.notify_one();
return true; return true;

@ -26,8 +26,8 @@ class FeatureCache : public FeatureExtractorInterface {
std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL); std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
virtual void Accept( virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs); const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
// output_feats dim = num_frames * feature_dim // feats dim = num_frames * feature_dim
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* output_feats); virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// feature cache only cache feature which from base extractor // feature cache only cache feature which from base extractor
virtual size_t Dim() const { return base_extractor_->Dim(); } virtual size_t Dim() const { return base_extractor_->Dim(); }
virtual void SetFinished() { virtual void SetFinished() {

@ -21,7 +21,8 @@ namespace ppspeech {
class FeatureExtractorInterface { class FeatureExtractorInterface {
public: public:
// accept input data // accept input data, accept feature or raw waves which decided
// by the base_extractor
virtual void Accept( virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0; const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
// get the processed result // get the processed result

@ -70,7 +70,7 @@ void LinearSpectrogram::Accept(const VectorBase<BaseFloat>& inputs) {
base_extractor_->Accept(inputs); base_extractor_->Accept(inputs);
} }
bool LinearSpectrogram::Read(Vector<BaseFloat>* output_feats) { bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
Vector<BaseFloat> input_feats(chunk_sample_size_); Vector<BaseFloat> input_feats(chunk_sample_size_);
bool flag = base_extractor_->Read(&input_feats); bool flag = base_extractor_->Read(&input_feats);
if (flag == false || input_feats.Dim() == 0) return false; if (flag == false || input_feats.Dim() == 0) return false;
@ -83,10 +83,10 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* output_feats) {
if (result.size() != 0) { if (result.size() != 0) {
feat_size = result.size() * result[0].size(); feat_size = result.size() * result[0].size();
} }
output_feats->Resize(feat_size); feats->Resize(feat_size);
// todo refactor (SimleGoat) // todo refactor (SimleGoat)
for (size_t idx = 0; idx < feat_size; ++idx) { for (size_t idx = 0; idx < feat_size; ++idx) {
(*output_feats)(idx) = result[idx / dim_][idx % dim_]; (*feats)(idx) = result[idx / dim_][idx % dim_];
} }
return true; return true;
} }
@ -120,9 +120,9 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
// Compute spectrogram feat // Compute spectrogram feat
// todo: refactor later (SmileGoat) // todo: refactor later (SmileGoat)
bool LinearSpectrogram::Compute(const vector<float>& wave, bool LinearSpectrogram::Compute(const vector<float>& waves,
vector<vector<float>>& feat) { vector<vector<float>>& feats) {
int num_samples = wave.size(); int num_samples = waves.size();
const int& frame_length = opts_.frame_opts.WindowSize(); const int& frame_length = opts_.frame_opts.WindowSize();
const int& sample_rate = opts_.frame_opts.samp_freq; const int& sample_rate = opts_.frame_opts.samp_freq;
const int& frame_shift = opts_.frame_opts.WindowShift(); const int& frame_shift = opts_.frame_opts.WindowShift();
@ -134,34 +134,34 @@ bool LinearSpectrogram::Compute(const vector<float>& wave,
} }
int num_frames = 1 + ((num_samples - frame_length) / frame_shift); int num_frames = 1 + ((num_samples - frame_length) / frame_shift);
feat.resize(num_frames); feats.resize(num_frames);
vector<float> fft_real((fft_points_ / 2 + 1), 0); vector<float> fft_real((fft_points_ / 2 + 1), 0);
vector<float> fft_img((fft_points_ / 2 + 1), 0); vector<float> fft_img((fft_points_ / 2 + 1), 0);
vector<float> v(frame_length, 0); vector<float> v(frame_length, 0);
vector<float> power((fft_points / 2 + 1)); vector<float> power((fft_points / 2 + 1));
for (int i = 0; i < num_frames; ++i) { for (int i = 0; i < num_frames; ++i) {
vector<float> data(wave.data() + i * frame_shift, vector<float> data(waves.data() + i * frame_shift,
wave.data() + i * frame_shift + frame_length); waves.data() + i * frame_shift + frame_length);
Hanning(&data); Hanning(&data);
fft_img.clear(); fft_img.clear();
fft_real.clear(); fft_real.clear();
v.assign(data.begin(), data.end()); v.assign(data.begin(), data.end());
NumpyFft(&v, &fft_real, &fft_img); NumpyFft(&v, &fft_real, &fft_img);
feat[i].resize(fft_points / 2 + 1); // the last dimension is Fs/2 Hz feats[i].resize(fft_points / 2 + 1); // the last dimension is Fs/2 Hz
for (int j = 0; j < (fft_points / 2 + 1); ++j) { for (int j = 0; j < (fft_points / 2 + 1); ++j) {
power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j]; power[j] = fft_real[j] * fft_real[j] + fft_img[j] * fft_img[j];
feat[i][j] = power[j]; feats[i][j] = power[j];
if (j == 0 || j == feat[0].size() - 1) { if (j == 0 || j == feats[0].size() - 1) {
feat[i][j] /= scale; feats[i][j] /= scale;
} else { } else {
feat[i][j] *= (2.0 / scale); feats[i][j] *= (2.0 / scale);
} }
// log added eps=1e-14 // log added eps=1e-14
feat[i][j] = std::log(feat[i][j] + 1e-14); feats[i][j] = std::log(feats[i][j] + 1e-14);
} }
} }
return true; return true;

@ -40,7 +40,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
std::unique_ptr<FeatureExtractorInterface> base_extractor); std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void Accept( virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs); const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* output_feats); virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// the dim_ is the dim of single frame feature // the dim_ is the dim of single frame feature
virtual size_t Dim() const { return dim_; } virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); } virtual void SetFinished() { base_extractor_->SetFinished(); }
@ -48,8 +48,8 @@ class LinearSpectrogram : public FeatureExtractorInterface {
private: private:
void Hanning(std::vector<kaldi::BaseFloat>* data) const; void Hanning(std::vector<kaldi::BaseFloat>* data) const;
bool Compute(const std::vector<kaldi::BaseFloat>& wave, bool Compute(const std::vector<kaldi::BaseFloat>& waves,
std::vector<std::vector<kaldi::BaseFloat>>& feat); std::vector<std::vector<kaldi::BaseFloat>>& feats);
bool NumpyFft(std::vector<kaldi::BaseFloat>* v, bool NumpyFft(std::vector<kaldi::BaseFloat>* v,
std::vector<kaldi::BaseFloat>* real, std::vector<kaldi::BaseFloat>* real,
std::vector<kaldi::BaseFloat>* img) const; std::vector<kaldi::BaseFloat>* img) const;

@ -35,16 +35,16 @@ DecibelNormalizer::DecibelNormalizer(
} }
void DecibelNormalizer::Accept( void DecibelNormalizer::Accept(
const kaldi::VectorBase<BaseFloat>& inputs_wave) { const kaldi::VectorBase<BaseFloat>& waves) {
base_extractor_->Accept(inputs_wave); base_extractor_->Accept(waves);
} }
bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* outputs_wave) { bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* waves) {
if (base_extractor_->Read(outputs_wave) == false || if (base_extractor_->Read(waves) == false ||
outputs_wave->Dim() == 0) { waves->Dim() == 0) {
return false; return false;
} }
Compute(outputs_wave); Compute(waves);
return true; return true;
} }
@ -67,7 +67,7 @@ void CopyStdVector2Vector(const vector<BaseFloat>& input,
} }
} }
bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const { bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
// calculate db rms // calculate db rms
BaseFloat rms_db = 0.0; BaseFloat rms_db = 0.0;
BaseFloat mean_square = 0.0; BaseFloat mean_square = 0.0;
@ -75,9 +75,9 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const {
BaseFloat wave_float_normlization = 1.0f / (std::pow(2, 16 - 1)); BaseFloat wave_float_normlization = 1.0f / (std::pow(2, 16 - 1));
vector<BaseFloat> samples; vector<BaseFloat> samples;
samples.resize(feats->Dim()); samples.resize(waves->Dim());
for (size_t i = 0; i < samples.size(); ++i) { for (size_t i = 0; i < samples.size(); ++i) {
samples[i] = (*feats)(i); samples[i] = (*waves)(i);
} }
// square // square
@ -107,7 +107,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const {
item *= std::pow(10.0, gain / 20.0); item *= std::pow(10.0, gain / 20.0);
} }
CopyStdVector2Vector(samples, feats); CopyStdVector2Vector(samples, waves);
return true; return true;
} }
@ -121,16 +121,16 @@ CMVN::CMVN(std::string cmvn_file,
dim_ = stats_.NumCols() - 1; dim_ = stats_.NumCols() - 1;
} }
void CMVN::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& feats) { void CMVN::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
base_extractor_->Accept(feats); base_extractor_->Accept(inputs);
return; return;
} }
bool CMVN::Read(kaldi::Vector<BaseFloat>* outputs) { bool CMVN::Read(kaldi::Vector<BaseFloat>* feats) {
if (base_extractor_->Read(outputs) == false) { if (base_extractor_->Read(feats) == false) {
return false; return false;
} }
Compute(outputs); Compute(feats);
return true; return true;
} }

@ -46,15 +46,15 @@ class DecibelNormalizer : public FeatureExtractorInterface {
const DecibelNormalizerOptions& opts, const DecibelNormalizerOptions& opts,
std::unique_ptr<FeatureExtractorInterface> base_extractor); std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void Accept( virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& inputs_wave); const kaldi::VectorBase<kaldi::BaseFloat>& waves);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* outputs_wave); virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
// noramlize audio, the dim is 1. // noramlize audio, the dim is 1.
virtual size_t Dim() const { return dim_; } virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); } virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); } virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
private: private:
bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const; bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
DecibelNormalizerOptions opts_; DecibelNormalizerOptions opts_;
size_t dim_; size_t dim_;
std::unique_ptr<FeatureExtractorInterface> base_extractor_; std::unique_ptr<FeatureExtractorInterface> base_extractor_;
@ -67,11 +67,11 @@ class CMVN : public FeatureExtractorInterface {
explicit CMVN(std::string cmvn_file, explicit CMVN(std::string cmvn_file,
std::unique_ptr<FeatureExtractorInterface> base_extractor); std::unique_ptr<FeatureExtractorInterface> base_extractor);
virtual void Accept( virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& feats); const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
// the length of outputs = feature_row * feature_dim, // the length of feats = feature_row * feature_dim,
// the Matrix is squashed into Vector // the Matrix is squashed into Vector
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* outputs); virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// the dim_ is the feautre dim. // the dim_ is the feautre dim.
virtual size_t Dim() const { return dim_; } virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); } virtual void SetFinished() { base_extractor_->SetFinished(); }

@ -26,20 +26,20 @@ RawAudioCache::RawAudioCache(int buffer_size)
ring_buffer_.resize(buffer_size); ring_buffer_.resize(buffer_size);
} }
void RawAudioCache::Accept(const VectorBase<BaseFloat>& input_audio) { void RawAudioCache::Accept(const VectorBase<BaseFloat>& waves) {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
while (data_length_ + input_audio.Dim() > ring_buffer_.size()) { while (data_length_ + waves.Dim() > ring_buffer_.size()) {
ready_feed_condition_.wait(lock); ready_feed_condition_.wait(lock);
} }
for (size_t idx = 0; idx < input_audio.Dim(); ++idx) { for (size_t idx = 0; idx < waves.Dim(); ++idx) {
int32 buffer_idx = (idx + start_) % ring_buffer_.size(); int32 buffer_idx = (idx + start_) % ring_buffer_.size();
ring_buffer_[buffer_idx] = input_audio(idx); ring_buffer_[buffer_idx] = waves(idx);
} }
data_length_ += input_audio.Dim(); data_length_ += waves.Dim();
} }
bool RawAudioCache::Read(Vector<BaseFloat>* output_audio) { bool RawAudioCache::Read(Vector<BaseFloat>* waves) {
size_t chunk_size = output_audio->Dim(); size_t chunk_size = waves->Dim();
kaldi::Timer timer; kaldi::Timer timer;
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
while (chunk_size > data_length_) { while (chunk_size > data_length_) {
@ -61,12 +61,12 @@ bool RawAudioCache::Read(Vector<BaseFloat>* output_audio) {
// read last chunk data // read last chunk data
if (chunk_size > data_length_) { if (chunk_size > data_length_) {
chunk_size = data_length_; chunk_size = data_length_;
output_audio->Resize(chunk_size); waves->Resize(chunk_size);
} }
for (size_t idx = 0; idx < chunk_size; ++idx) { for (size_t idx = 0; idx < chunk_size; ++idx) {
int buff_idx = (start_ + idx) % ring_buffer_.size(); int buff_idx = (start_ + idx) % ring_buffer_.size();
output_audio->Data()[idx] = ring_buffer_[buff_idx]; waves->Data()[idx] = ring_buffer_[buff_idx];
} }
data_length_ -= chunk_size; data_length_ -= chunk_size;
start_ = (start_ + chunk_size) % ring_buffer_.size(); start_ = (start_ + chunk_size) % ring_buffer_.size();

@ -23,8 +23,8 @@ namespace ppspeech {
class RawAudioCache : public FeatureExtractorInterface { class RawAudioCache : public FeatureExtractorInterface {
public: public:
explicit RawAudioCache(int buffer_size = kint16max); explicit RawAudioCache(int buffer_size = kint16max);
virtual void Accept(const kaldi::VectorBase<BaseFloat>& input_audio); virtual void Accept(const kaldi::VectorBase<BaseFloat>& waves);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* output_audio); virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
// the audio dim is 1 // the audio dim is 1
virtual size_t Dim() const { return 1; } virtual size_t Dim() const { return 1; }
virtual void SetFinished() { virtual void SetFinished() {
@ -45,19 +45,20 @@ class RawAudioCache : public FeatureExtractorInterface {
DISALLOW_COPY_AND_ASSIGN(RawAudioCache); DISALLOW_COPY_AND_ASSIGN(RawAudioCache);
}; };
// it is a datasource for testing different frontend module. // it is a data source to test different frontend module.
// it Accepts waves or feats.
class RawDataCache: public FeatureExtractorInterface { class RawDataCache: public FeatureExtractorInterface {
public: public:
explicit RawDataCache() { finished_ = false; } explicit RawDataCache() { finished_ = false; }
virtual void Accept( virtual void Accept(
const kaldi::VectorBase<kaldi::BaseFloat>& input) { const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
data_ = input; data_ = inputs;
} }
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feat) { virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
if (data_.Dim() == 0) { if (data_.Dim() == 0) {
return false; return false;
} }
(*feat) = data_; (*feats) = data_;
data_.Resize(0); data_.Resize(0);
return true; return true;
} }

Loading…
Cancel
Save