add profiling timer

pull/2591/head
Hui Zhang 3 years ago
parent 67424bedc5
commit 4638c35cbc

@ -69,20 +69,30 @@ void CTCPrefixBeamSearch::InitDecoder() { Reset(); }
void CTCPrefixBeamSearch::AdvanceDecode(
const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
double search_cost = 0.0;
double feat_nnet_cost = 0.0;
while (1) {
// forward frame by frame
kaldi::Timer timer;
std::vector<kaldi::BaseFloat> frame_prob;
bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
feat_nnet_cost += timer.Elapsed();
if (flag == false) {
VLOG(1) << "decoder advance decode exit." << frame_prob.size();
VLOG(3) << "decoder advance decode exit." << frame_prob.size();
break;
}
timer.Reset();
std::vector<std::vector<kaldi::BaseFloat>> likelihood;
likelihood.push_back(frame_prob);
AdvanceDecoding(likelihood);
search_cost += timer.Elapsed();
VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;
}
VLOG(1) << "AdvanceDecode feat + forward cost: " << feat_nnet_cost
<< " sec.";
VLOG(1) << "AdvanceDecode search cost: " << search_cost << " sec.";
}
static bool PrefixScoreCompare(

@ -40,7 +40,9 @@ void Assembler::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
// pop feature chunk
bool Assembler::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
kaldi::Timer timer;
bool result = Compute(feats);
VLOG(1) << "Assembler::Read cost: " << timer.Elapsed() << " sec.";
return result;
}
@ -51,14 +53,14 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
Vector<BaseFloat> feature;
bool result = base_extractor_->Read(&feature);
if (result == false || feature.Dim() == 0) {
VLOG(1) << "result: " << result
VLOG(3) << "result: " << result
<< " feature dim: " << feature.Dim();
if (IsFinished() == false) {
VLOG(1) << "finished reading feature. cache size: "
VLOG(3) << "finished reading feature. cache size: "
<< feature_cache_.size();
return false;
} else {
VLOG(1) << "break";
VLOG(3) << "break";
break;
}
}
@ -67,11 +69,11 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
feature_cache_.push(feature);
nframes_ += 1;
VLOG(1) << "nframes: " << nframes_;
VLOG(3) << "nframes: " << nframes_;
}
if (feature_cache_.size() < receptive_filed_length_) {
VLOG(1) << "feature_cache less than receptive_filed_lenght. "
VLOG(3) << "feature_cache less than receptive_filed_lenght. "
<< feature_cache_.size() << ": " << receptive_filed_length_;
return false;
}
@ -87,7 +89,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
int32 this_chunk_size =
std::min(static_cast<int32>(feature_cache_.size()), frame_chunk_size_);
feats->Resize(dim_ * this_chunk_size);
VLOG(1) << "read " << this_chunk_size << " feat.";
VLOG(3) << "read " << this_chunk_size << " feat.";
int32 counter = 0;
while (counter < this_chunk_size) {

@ -38,6 +38,7 @@ BaseFloat AudioCache::Convert2PCM32(BaseFloat val) {
}
void AudioCache::Accept(const VectorBase<BaseFloat>& waves) {
kaldi::Timer timer;
std::unique_lock<std::mutex> lock(mutex_);
while (size_ + waves.Dim() > ring_buffer_.size()) {
ready_feed_condition_.wait(lock);
@ -48,11 +49,13 @@ void AudioCache::Accept(const VectorBase<BaseFloat>& waves) {
if (to_float32_) ring_buffer_[buffer_idx] = Convert2PCM32(waves(idx));
}
size_ += waves.Dim();
VLOG(1) << "AudioCache::Accept cost: " << timer.Elapsed() << " sec. "
<< waves.Dim() << " samples.";
}
bool AudioCache::Read(Vector<BaseFloat>* waves) {
size_t chunk_size = waves->Dim();
kaldi::Timer timer;
size_t chunk_size = waves->Dim();
std::unique_lock<std::mutex> lock(mutex_);
while (chunk_size > size_) {
// when audio is empty and no more data feed
@ -86,9 +89,11 @@ bool AudioCache::Read(Vector<BaseFloat>* waves) {
offset_ = (offset_ + chunk_size) % ring_buffer_.size();
nsamples_ += chunk_size;
VLOG(1) << "nsamples readed: " << nsamples_;
VLOG(3) << "nsamples readed: " << nsamples_;
ready_feed_condition_.notify_one();
VLOG(1) << "AudioCache::Read cost: " << timer.Elapsed() << " sec. "
<< chunk_size << " samples.";
return true;
}

@ -50,8 +50,11 @@ bool CMVN::Read(kaldi::Vector<BaseFloat>* feats) {
if (base_extractor_->Read(feats) == false || feats->Dim() == 0) {
return false;
}
// appply cmvn
kaldi::Timer timer;
Compute(feats);
VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec.";
return true;
}

@ -27,27 +27,32 @@ namespace ppspeech {
// pre-recorded audio/feature
class DataCache : public FrontendInterface {
public:
DataCache() { finished_ = false; }
DataCache() : finished_{false}, dim_{0} {}
// accept waves/feats
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) override {
data_ = inputs;
SetDim(data_.Dim());
}
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
bool Read(kaldi::Vector<kaldi::BaseFloat>* feats) override {
if (data_.Dim() == 0) {
return false;
}
(*feats) = data_;
data_.Resize(0);
SetDim(data_.Dim());
return true;
}
virtual void SetFinished() { finished_ = true; }
virtual bool IsFinished() const { return finished_; }
virtual size_t Dim() const { return dim_; }
void SetFinished() override { finished_ = true; }
bool IsFinished() const override { return finished_; }
size_t Dim() const override { return dim_; }
void SetDim(int32 dim) { dim_ = dim; }
virtual void Reset() { finished_ = true; }
void Reset() override {
finished_ = true;
dim_ = 0;
}
private:
kaldi::Vector<kaldi::BaseFloat> data_;

@ -34,6 +34,7 @@ FeatureCache::FeatureCache(FeatureCacheOptions opts,
void FeatureCache::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
// read inputs
base_extractor_->Accept(inputs);
// feed current data
bool result = false;
do {
@ -62,6 +63,7 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
feats->CopyFromVec(cache_.front());
cache_.pop();
ready_feed_condition_.notify_one();
VLOG(1) << "FeatureCache::Read cost: " << timer.Elapsed() << " sec.";
return true;
}
@ -72,9 +74,11 @@ bool FeatureCache::Compute() {
bool result = base_extractor_->Read(&feature);
if (result == false || feature.Dim() == 0) return false;
kaldi::Timer timer;
int32 num_chunk = feature.Dim() / dim_;
nframe_ += num_chunk;
VLOG(1) << "nframe computed: " << nframe_;
VLOG(3) << "nframe computed: " << nframe_;
for (int chunk_idx = 0; chunk_idx < num_chunk; ++chunk_idx) {
int32 start = chunk_idx * dim_;
@ -92,7 +96,10 @@ bool FeatureCache::Compute() {
cache_.push(feature_chunk);
ready_read_condition_.notify_one();
}
return result;
VLOG(1) << "FeatureCache::Compute cost: " << timer.Elapsed() << " sec. "
<< num_chunk << " feats.";
return true;
}
} // namespace ppspeech

@ -58,7 +58,7 @@ class FeatureCache : public FrontendInterface {
std::swap(cache_, empty);
nframe_ = 0;
base_extractor_->Reset();
VLOG(1) << "feature cache reset: cache size: " << cache_.size();
VLOG(3) << "feature cache reset: cache size: " << cache_.size();
}
private:

@ -34,6 +34,7 @@ bool StreamingFeatureTpl<F>::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
bool flag = base_extractor_->Read(&wav);
if (flag == false || wav.Dim() == 0) return false;
kaldi::Timer timer;
// append remaned waves
int32 wav_len = wav.Dim();
int32 left_len = remained_wav_.Dim();
@ -52,6 +53,8 @@ bool StreamingFeatureTpl<F>::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
remained_wav_.Resize(left_samples);
remained_wav_.CopyFromVec(
waves.Range(frame_shift * num_frames, left_samples));
VLOG(1) << "StreamingFeatureTpl<F>::Read cost: " << timer.Elapsed()
<< " sec.";
return true;
}

@ -68,9 +68,10 @@ bool Decodable::AdvanceChunk() {
Vector<BaseFloat> features;
if (frontend_ == NULL || frontend_->Read(&features) == false) {
// no feat or frontend_ not init.
VLOG(1) << "decodable exit;";
VLOG(3) << "decodable exit;";
return false;
}
VLOG(1) << "AdvanceChunk feat cost: " << timer.Elapsed() << " sec.";
VLOG(2) << "Forward in " << features.Dim() / frontend_->Dim() << " feats.";
// forward feats
@ -88,7 +89,8 @@ bool Decodable::AdvanceChunk() {
// update state, decoding frame.
frame_offset_ = frames_ready_;
frames_ready_ += nnet_out_cache_.NumRows();
VLOG(2) << "Forward feat chunk cost: " << timer.Elapsed() << " sec.";
VLOG(1) << "AdvanceChunk feat + forward cost: " << timer.Elapsed()
<< " sec.";
return true;
}
@ -115,7 +117,7 @@ bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
// read one frame likelihood
bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
if (EnsureFrameHaveComputed(frame) == false) {
VLOG(1) << "framelikehood exit.";
VLOG(3) << "framelikehood exit.";
return false;
}
@ -168,7 +170,9 @@ void Decodable::Reset() {
void Decodable::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
float reverse_weight,
std::vector<float>* rescoring_score) {
kaldi::Timer timer;
nnet_->AttentionRescoring(hyps, reverse_weight, rescoring_score);
VLOG(1) << "Attention Rescoring cost: " << timer.Elapsed() << " sec.";
}
} // namespace ppspeech

@ -154,7 +154,7 @@ void U2Nnet::Reset() {
std::move(paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32));
encoder_outs_.clear();
VLOG(1) << "u2nnet reset";
VLOG(3) << "u2nnet reset";
}
// Debug API
@ -168,6 +168,7 @@ void U2Nnet::FeedEncoderOuts(const paddle::Tensor& encoder_out) {
void U2Nnet::FeedForward(const kaldi::Vector<BaseFloat>& features,
const int32& feature_dim,
NnetOut* out) {
kaldi::Timer timer;
std::vector<kaldi::BaseFloat> chunk_feats(features.Data(),
features.Data() + features.Dim());
@ -179,6 +180,8 @@ void U2Nnet::FeedForward(const kaldi::Vector<BaseFloat>& features,
std::memcpy(out->logprobs.Data(),
ctc_probs.data(),
ctc_probs.size() * sizeof(kaldi::BaseFloat));
VLOG(1) << "FeedForward cost: " << timer.Elapsed() << " sec. "
<< chunk_feats.size() / feature_dim << " frames.";
}
@ -415,7 +418,6 @@ void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
#ifdef USE_PROFILING
RecordEvent event("AttentionRescoring", TracerEventType::UserDefined, 1);
#endif
CHECK(rescoring_score != nullptr);
int num_hyps = hyps.size();
@ -627,7 +629,7 @@ void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
// combinded left-to-right and right-to-lfet score
(*rescoring_score)[i] =
score * (1 - reverse_weight) + r_score * reverse_weight;
VLOG(1) << "hyp " << i << " " << hyp.size() << " score: " << score
VLOG(3) << "hyp " << i << " " << hyp.size() << " score: " << score
<< " r_score: " << r_score
<< " reverse_weight: " << reverse_weight
<< " final score: " << (*rescoring_score)[i];
@ -639,7 +641,7 @@ void U2Nnet::EncoderOuts(
std::vector<kaldi::Vector<kaldi::BaseFloat>>* encoder_out) const {
// list of (B=1,T,D)
int size = encoder_outs_.size();
VLOG(1) << "encoder_outs_ size: " << size;
VLOG(3) << "encoder_outs_ size: " << size;
for (int i = 0; i < size; i++) {
const paddle::Tensor& item = encoder_outs_[i];
@ -649,7 +651,7 @@ void U2Nnet::EncoderOuts(
const int& T = shape[1];
const int& D = shape[2];
CHECK(B == 1) << "Only support batch one.";
VLOG(1) << "encoder out " << i << " shape: (" << B << "," << T << ","
VLOG(3) << "encoder out " << i << " shape: (" << B << "," << T << ","
<< D << ")";
const float* this_tensor_ptr = item.data<float>();

@ -67,7 +67,10 @@ void U2Recognizer::ResetContinuousDecoding() {
void U2Recognizer::Accept(const VectorBase<BaseFloat>& waves) {
kaldi::Timer timer;
feature_pipeline_->Accept(waves);
VLOG(1) << "feed waves cost: " << timer.Elapsed() << " sec. " << waves.Dim()
<< " samples.";
}
@ -78,9 +81,7 @@ void U2Recognizer::Decode() {
void U2Recognizer::Rescoring() {
// Do attention Rescoring
kaldi::Timer timer;
AttentionRescoring();
VLOG(1) << "Rescoring cost latency: " << timer.Elapsed() << " sec.";
}
void U2Recognizer::UpdateResult(bool finish) {
@ -181,15 +182,13 @@ void U2Recognizer::AttentionRescoring() {
return;
}
kaldi::Timer timer;
std::vector<float> rescoring_score;
decodable_->AttentionRescoring(
hypotheses, opts_.decoder_opts.reverse_weight, &rescoring_score);
VLOG(1) << "Attention Rescoring takes " << timer.Elapsed() << " sec.";
// combine ctc score and rescoring score
for (size_t i = 0; i < num_hyps; i++) {
VLOG(1) << "hyp " << i << " rescoring_score: " << rescoring_score[i]
VLOG(3) << "hyp " << i << " rescoring_score: " << rescoring_score[i]
<< " ctc_score: " << result_[i].score
<< " rescoring_weight: " << opts_.decoder_opts.rescoring_weight
<< " ctc_weight: " << opts_.decoder_opts.ctc_weight;
@ -197,12 +196,12 @@ void U2Recognizer::AttentionRescoring() {
opts_.decoder_opts.rescoring_weight * rescoring_score[i] +
opts_.decoder_opts.ctc_weight * result_[i].score;
VLOG(1) << "hyp: " << result_[0].sentence
VLOG(3) << "hyp: " << result_[0].sentence
<< " score: " << result_[0].score;
}
std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
VLOG(1) << "result: " << result_[0].sentence
VLOG(3) << "result: " << result_[0].sentence
<< " score: " << result_[0].score;
}

@ -31,6 +31,7 @@ int main(int argc, char* argv[]) {
int32 num_done = 0, num_err = 0;
double tot_wav_duration = 0.0;
double tot_decode_time = 0.0;
kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
FLAGS_wav_rspecifier);
@ -47,9 +48,7 @@ int main(int argc, char* argv[]) {
ppspeech::U2RecognizerResource::InitFromFlags();
ppspeech::U2Recognizer recognizer(resource);
kaldi::Timer timer;
for (; !wav_reader.Done(); wav_reader.Next()) {
kaldi::Timer local_timer;
std::string utt = wav_reader.Key();
const kaldi::WaveData& wave_data = wav_reader.Value();
LOG(INFO) << "utt: " << utt;
@ -65,6 +64,8 @@ int main(int argc, char* argv[]) {
int sample_offset = 0;
int cnt = 0;
kaldi::Timer timer;
kaldi::Timer local_timer;
while (sample_offset < tot_samples) {
int cur_chunk_size =
@ -95,6 +96,8 @@ int main(int argc, char* argv[]) {
// second pass decoding
recognizer.Rescoring();
tot_decode_time += timer.Elapsed();
std::string result = recognizer.GetFinalResult();
recognizer.Reset();
@ -115,10 +118,8 @@ int main(int argc, char* argv[]) {
++num_done;
}
double elapsed = timer.Elapsed();
LOG(INFO) << "Done " << num_done << " out of " << (num_err + num_done);
LOG(INFO) << "total cost:" << elapsed << " sec";
LOG(INFO) << "total wav duration is: " << tot_wav_duration << " sec";
LOG(INFO) << "RTF is: " << elapsed / tot_wav_duration;
LOG(INFO) << "total decode cost:" << tot_decode_time << " sec";
LOG(INFO) << "RTF is: " << tot_decode_time / tot_wav_duration;
}

Loading…
Cancel
Save