You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/runtime/engine/common/frontend/audio_cache.cc

100 lines
3.1 KiB

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/audio_cache.h"
#include "kaldi/base/timer.h"
namespace ppspeech {
using kaldi::BaseFloat;
using std::vector;
AudioCache::AudioCache(int buffer_size, bool to_float32)
: finished_(false),
capacity_(buffer_size), // unit: sample
size_(0),
offset_(0),
timeout_(1), // ms
to_float32_(to_float32) {
ring_buffer_.resize(capacity_);
}
BaseFloat AudioCache::Convert2PCM32(BaseFloat val) {
// sample type int16, int16->float32
return val * (1. / std::pow(2.0, 15));
}
void AudioCache::Accept(const vector<BaseFloat>& waves) {
kaldi::Timer timer;
std::unique_lock<std::mutex> lock(mutex_);
while (size_ + waves.size() > ring_buffer_.size()) {
ready_feed_condition_.wait(lock);
}
for (size_t idx = 0; idx < waves.size(); ++idx) {
int32 buffer_idx = (idx + offset_ + size_) % ring_buffer_.size();
ring_buffer_[buffer_idx] = waves[idx];
if (to_float32_) ring_buffer_[buffer_idx] = Convert2PCM32(waves[idx]);
}
size_ += waves.size();
VLOG(1) << "AudioCache::Accept cost: " << timer.Elapsed() << " sec. "
<< waves.size() << " samples.";
}
bool AudioCache::Read(vector<BaseFloat>* waves) {
kaldi::Timer timer;
size_t chunk_size = waves->size();
std::unique_lock<std::mutex> lock(mutex_);
while (chunk_size > size_) {
// when audio is empty and no more data feed
// ready_read_condition will block in dead lock,
// so replace with timeout_
// ready_read_condition_.wait(lock);
int32 elapsed = static_cast<int32>(timer.Elapsed() * 1000);
if (elapsed > timeout_) {
if (finished_ == true) {
// read last chunk data
break;
}
if (chunk_size > size_) {
return false;
}
}
usleep(100); // sleep 0.1 ms
}
// read last chunk data
if (chunk_size > size_) {
chunk_size = size_;
waves->resize(chunk_size);
}
for (size_t idx = 0; idx < chunk_size; ++idx) {
int buff_idx = (offset_ + idx) % ring_buffer_.size();
waves->at(idx) = ring_buffer_[buff_idx];
}
size_ -= chunk_size;
offset_ = (offset_ + chunk_size) % ring_buffer_.size();
nsamples_ += chunk_size;
VLOG(3) << "nsamples readed: " << nsamples_;
ready_feed_condition_.notify_one();
VLOG(1) << "AudioCache::Read cost: " << timer.Elapsed() << " sec. "
<< chunk_size << " samples.";
return true;
}
} // namespace ppspeech