From aaf39863e03a52ec4c1cfc9e580c4c73d277f3bc Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 25 Oct 2022 06:35:20 +0000 Subject: [PATCH] more info --- .../decoder/ctc_prefix_beam_search_decoder.cc | 2 +- .../speechx/recognizer/u2_recognizer_main.cc | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc index 0a0afcd7..03a7c133 100644 --- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc +++ b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc @@ -74,7 +74,7 @@ void CTCPrefixBeamSearch::AdvanceDecode( std::vector frame_prob; bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob); if (flag == false) { - LOG(INFO) << "decoder advance decode exit." << frame_prob.size(); + VLOG(1) << "decoder advance decode exit." << frame_prob.size(); break; } diff --git a/speechx/speechx/recognizer/u2_recognizer_main.cc b/speechx/speechx/recognizer/u2_recognizer_main.cc index c02e1c23..61330259 100644 --- a/speechx/speechx/recognizer/u2_recognizer_main.cc +++ b/speechx/speechx/recognizer/u2_recognizer_main.cc @@ -49,11 +49,13 @@ int main(int argc, char* argv[]) { kaldi::Timer timer; for (; !wav_reader.Done(); wav_reader.Next()) { + kaldi::Timer local_timer; std::string utt = wav_reader.Key(); const kaldi::WaveData& wave_data = wav_reader.Value(); LOG(INFO) << "utt: " << utt; LOG(INFO) << "wav dur: " << wave_data.Duration() << " sec."; - tot_wav_duration += wave_data.Duration(); + double dur = wave_data.Duration(); + tot_wav_duration += dur; int32 this_channel = 0; kaldi::SubVector waveform(wave_data.Data(), @@ -63,6 +65,7 @@ int main(int argc, char* argv[]) { int sample_offset = 0; int cnt = 0; + while (sample_offset < tot_samples) { int cur_chunk_size = std::min(chunk_sample_size, tot_samples - sample_offset); @@ -78,8 +81,10 @@ int main(int argc, char* argv[]) { recognizer.SetFinished(); } recognizer.Decode(); - LOG(INFO) << "Pratial result: " << cnt << " " - << recognizer.GetPartialResult(); + if (recognizer.DecodedSomething()) { + LOG(INFO) << "Pratial result: " << cnt << " " + << recognizer.GetPartialResult(); + } // no overlap sample_offset += cur_chunk_size; @@ -101,7 +106,9 @@ int main(int argc, char* argv[]) { continue; } - LOG(INFO) << " the result of " << utt << " is " << result; + LOG(INFO) << utt << " " << result; + LOG(INFO) << " RTF: " << dur / local_timer.Elapsed() << " dur: " << dur + << " cost: " << local_timer.Elapsed(); result_writer.Write(utt, result); @@ -111,7 +118,7 @@ int main(int argc, char* argv[]) { double elapsed = timer.Elapsed(); LOG(INFO) << "Done " << num_done << " out of " << (num_err + num_done); - LOG(INFO) << "cost:" << elapsed << " sec"; + LOG(INFO) << "total cost:" << elapsed << " sec"; LOG(INFO) << "total wav duration is: " << tot_wav_duration << " sec"; - LOG(INFO) << "the RTF is: " << elapsed / tot_wav_duration; + LOG(INFO) << "RTF is: " << elapsed / tot_wav_duration; }