Merge pull request #1542 from SmileGoat/stream_feature
[speechx]add raw_audio & feature_cachepull/1559/head
commit
bedd2de46c
@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "frontend/feature_cache.h"
|
||||||
|
|
||||||
|
namespace ppspeech {
|
||||||
|
|
||||||
|
using kaldi::Vector;
|
||||||
|
using kaldi::VectorBase;
|
||||||
|
using kaldi::BaseFloat;
|
||||||
|
using std::vector;
|
||||||
|
using kaldi::SubVector;
|
||||||
|
using std::unique_ptr;
|
||||||
|
|
||||||
|
FeatureCache::FeatureCache(
|
||||||
|
int max_size, unique_ptr<FeatureExtractorInterface> base_extractor) {
|
||||||
|
max_size_ = max_size;
|
||||||
|
base_extractor_ = std::move(base_extractor);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FeatureCache::Accept(
|
||||||
|
const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
|
||||||
|
base_extractor_->Accept(inputs);
|
||||||
|
// feed current data
|
||||||
|
bool result = false;
|
||||||
|
do {
|
||||||
|
result = Compute();
|
||||||
|
} while (result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// pop feature chunk
|
||||||
|
bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
|
||||||
|
kaldi::Timer timer;
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_);
|
||||||
|
while (cache_.empty() && base_extractor_->IsFinished() == false) {
|
||||||
|
ready_read_condition_.wait(lock);
|
||||||
|
BaseFloat elapsed = timer.Elapsed() * 1000;
|
||||||
|
// todo replace 1.0 with timeout_
|
||||||
|
if (elapsed > 1.0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
usleep(1000); // sleep 1 ms
|
||||||
|
}
|
||||||
|
if (cache_.empty()) return false;
|
||||||
|
feats->Resize(cache_.front().Dim());
|
||||||
|
feats->CopyFromVec(cache_.front());
|
||||||
|
cache_.pop();
|
||||||
|
ready_feed_condition_.notify_one();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read all data from base_feature_extractor_ into cache_
|
||||||
|
bool FeatureCache::Compute() {
|
||||||
|
// compute and feed
|
||||||
|
Vector<BaseFloat> feature_chunk;
|
||||||
|
bool result = base_extractor_->Read(&feature_chunk);
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_);
|
||||||
|
while (cache_.size() >= max_size_) {
|
||||||
|
ready_feed_condition_.wait(lock);
|
||||||
|
}
|
||||||
|
if (feature_chunk.Dim() != 0) {
|
||||||
|
cache_.push(feature_chunk);
|
||||||
|
}
|
||||||
|
ready_read_condition_.notify_one();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Reset() {
|
||||||
|
// std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ppspeech
|
@ -0,0 +1,53 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "base/common.h"
|
||||||
|
#include "frontend/feature_extractor_interface.h"
|
||||||
|
|
||||||
|
namespace ppspeech {
|
||||||
|
|
||||||
|
class FeatureCache : public FeatureExtractorInterface {
|
||||||
|
public:
|
||||||
|
explicit FeatureCache(
|
||||||
|
int32 max_size = kint16max,
|
||||||
|
std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
|
||||||
|
virtual void Accept(
|
||||||
|
const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
|
||||||
|
// feats dim = num_frames * feature_dim
|
||||||
|
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
|
||||||
|
// feature cache only cache feature which from base extractor
|
||||||
|
virtual size_t Dim() const { return base_extractor_->Dim(); }
|
||||||
|
virtual void SetFinished() {
|
||||||
|
base_extractor_->SetFinished();
|
||||||
|
// read the last chunk data
|
||||||
|
Compute();
|
||||||
|
}
|
||||||
|
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool Compute();
|
||||||
|
|
||||||
|
bool finished_;
|
||||||
|
std::mutex mutex_;
|
||||||
|
size_t max_size_;
|
||||||
|
std::queue<kaldi::Vector<BaseFloat>> cache_;
|
||||||
|
std::unique_ptr<FeatureExtractorInterface> base_extractor_;
|
||||||
|
std::condition_variable ready_feed_condition_;
|
||||||
|
std::condition_variable ready_read_condition_;
|
||||||
|
//DISALLOW_COPY_AND_ASSGIN(FeatureCache);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ppspeech
|
@ -0,0 +1,77 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "frontend/raw_audio.h"
|
||||||
|
#include "kaldi/base/timer.h"
|
||||||
|
|
||||||
|
namespace ppspeech {
|
||||||
|
|
||||||
|
using kaldi::BaseFloat;
|
||||||
|
using kaldi::VectorBase;
|
||||||
|
using kaldi::Vector;
|
||||||
|
|
||||||
|
RawAudioCache::RawAudioCache(int buffer_size)
|
||||||
|
: finished_(false), data_length_(0), start_(0), timeout_(1) {
|
||||||
|
ring_buffer_.resize(buffer_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RawAudioCache::Accept(const VectorBase<BaseFloat>& waves) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_);
|
||||||
|
while (data_length_ + waves.Dim() > ring_buffer_.size()) {
|
||||||
|
ready_feed_condition_.wait(lock);
|
||||||
|
}
|
||||||
|
for (size_t idx = 0; idx < waves.Dim(); ++idx) {
|
||||||
|
int32 buffer_idx = (idx + start_) % ring_buffer_.size();
|
||||||
|
ring_buffer_[buffer_idx] = waves(idx);
|
||||||
|
}
|
||||||
|
data_length_ += waves.Dim();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RawAudioCache::Read(Vector<BaseFloat>* waves) {
|
||||||
|
size_t chunk_size = waves->Dim();
|
||||||
|
kaldi::Timer timer;
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_);
|
||||||
|
while (chunk_size > data_length_) {
|
||||||
|
// when audio is empty and no more data feed
|
||||||
|
// ready_read_condition will block in dead lock. so replace with timeout_
|
||||||
|
// ready_read_condition_.wait(lock);
|
||||||
|
int32 elapsed = static_cast<int32>(timer.Elapsed() * 1000);
|
||||||
|
if (elapsed > timeout_) {
|
||||||
|
if (finished_ == true) { // read last chunk data
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (chunk_size > data_length_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
usleep(100); // sleep 0.1 ms
|
||||||
|
}
|
||||||
|
|
||||||
|
// read last chunk data
|
||||||
|
if (chunk_size > data_length_) {
|
||||||
|
chunk_size = data_length_;
|
||||||
|
waves->Resize(chunk_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t idx = 0; idx < chunk_size; ++idx) {
|
||||||
|
int buff_idx = (start_ + idx) % ring_buffer_.size();
|
||||||
|
waves->Data()[idx] = ring_buffer_[buff_idx];
|
||||||
|
}
|
||||||
|
data_length_ -= chunk_size;
|
||||||
|
start_ = (start_ + chunk_size) % ring_buffer_.size();
|
||||||
|
ready_feed_condition_.notify_one();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ppspeech
|
@ -0,0 +1,77 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "base/common.h"
|
||||||
|
#include "frontend/feature_extractor_interface.h"
|
||||||
|
|
||||||
|
namespace ppspeech {
|
||||||
|
|
||||||
|
class RawAudioCache : public FeatureExtractorInterface {
|
||||||
|
public:
|
||||||
|
explicit RawAudioCache(int buffer_size = kint16max);
|
||||||
|
virtual void Accept(const kaldi::VectorBase<BaseFloat>& waves);
|
||||||
|
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
|
||||||
|
// the audio dim is 1
|
||||||
|
virtual size_t Dim() const { return 1; }
|
||||||
|
virtual void SetFinished() {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
finished_ = true;
|
||||||
|
}
|
||||||
|
virtual bool IsFinished() const { return finished_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<kaldi::BaseFloat> ring_buffer_;
|
||||||
|
size_t start_;
|
||||||
|
size_t data_length_;
|
||||||
|
bool finished_;
|
||||||
|
mutable std::mutex mutex_;
|
||||||
|
std::condition_variable ready_feed_condition_;
|
||||||
|
kaldi::int32 timeout_;
|
||||||
|
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(RawAudioCache);
|
||||||
|
};
|
||||||
|
|
||||||
|
// it is a data source to test different frontend module.
|
||||||
|
// it Accepts waves or feats.
|
||||||
|
class RawDataCache: public FeatureExtractorInterface {
|
||||||
|
public:
|
||||||
|
explicit RawDataCache() { finished_ = false; }
|
||||||
|
virtual void Accept(
|
||||||
|
const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
|
||||||
|
data_ = inputs;
|
||||||
|
}
|
||||||
|
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
|
||||||
|
if (data_.Dim() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
(*feats) = data_;
|
||||||
|
data_.Resize(0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
//the dim is data_ length
|
||||||
|
virtual size_t Dim() const { return data_.Dim(); }
|
||||||
|
virtual void SetFinished() { finished_ = true; }
|
||||||
|
virtual bool IsFinished() const { return finished_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
kaldi::Vector<kaldi::BaseFloat> data_;
|
||||||
|
bool finished_;
|
||||||
|
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(RawDataCache);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ppspeech
|
Loading…
Reference in new issue