diff --git a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc index f54f21fa2..bf912af2e 100644 --- a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc +++ b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc @@ -87,9 +87,9 @@ void CTCPrefixBeamSearch::AdvanceDecode( VLOG(1) << "num_frame_decoded_: " << num_frame_decoded_; } - VLOG(1) << "AdvanceDecode feat + forward cost: " << feat_nnet_cost + VLOG(2) << "AdvanceDecode feat + forward cost: " << feat_nnet_cost << " sec."; - VLOG(1) << "AdvanceDecode search cost: " << search_cost << " sec."; + VLOG(2) << "AdvanceDecode search cost: " << search_cost << " sec."; } static bool PrefixScoreCompare( diff --git a/runtime/engine/asr/nnet/nnet_producer.cc b/runtime/engine/asr/nnet/nnet_producer.cc index 431b70251..529fae656 100644 --- a/runtime/engine/asr/nnet/nnet_producer.cc +++ b/runtime/engine/asr/nnet/nnet_producer.cc @@ -46,7 +46,6 @@ void NnetProducer::Acceptlikelihood( bool NnetProducer::Read(std::vector* nnet_prob) { bool flag = cache_.pop(nnet_prob); - VLOG(1) << "nnet cache_ size: " << cache_.size(); return flag; } diff --git a/runtime/engine/asr/nnet/u2_nnet.cc b/runtime/engine/asr/nnet/u2_nnet.cc index 968b6ceea..9a09514e3 100644 --- a/runtime/engine/asr/nnet/u2_nnet.cc +++ b/runtime/engine/asr/nnet/u2_nnet.cc @@ -124,7 +124,15 @@ U2Nnet::U2Nnet(const U2Nnet& other) { offset_ = other.offset_; // copy model ptr - model_ = other.model_->Clone(); + // model_ = other.model_->Clone(); + // hack, fix later + #ifdef WITH_GPU + dev_ = phi::GPUPlace(); + #else + dev_ = phi::CPUPlace(); + #endif + paddle::jit::Layer model = paddle::jit::Load(other.opts_.model_path, dev_); + model_ = std::make_shared(std::move(model)); ctc_activation_ = model_->Function("ctc_activation"); subsampling_rate_ = model_->Attribute("subsampling_rate"); right_context_ = model_->Attribute("right_context"); @@ -166,6 +174,7 @@ void U2Nnet::Reset() { std::move(paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32)); encoder_outs_.clear(); + VLOG(1) << "FeedForward cost: " << cost_time_ << " sec. "; VLOG(3) << "u2nnet reset"; } @@ -185,8 +194,10 @@ void U2Nnet::FeedForward(const std::vector& features, std::vector ctc_probs; ForwardEncoderChunkImpl( features, feature_dim, &out->logprobs, &out->vocab_dim); - VLOG(1) << "FeedForward cost: " << timer.Elapsed() << " sec. " + float forward_chunk_time = timer.Elapsed(); + VLOG(1) << "FeedForward cost: " << forward_chunk_time << " sec. " << features.size() / feature_dim << " frames."; + cost_time_ += forward_chunk_time; } diff --git a/runtime/engine/asr/nnet/u2_nnet.h b/runtime/engine/asr/nnet/u2_nnet.h index 35a157078..dba5c55e0 100644 --- a/runtime/engine/asr/nnet/u2_nnet.h +++ b/runtime/engine/asr/nnet/u2_nnet.h @@ -113,8 +113,8 @@ class U2Nnet : public U2NnetBase { void EncoderOuts( std::vector>* encoder_out) const; + ModelOptions opts_; // hack, fix later private: - ModelOptions opts_; phi::Place dev_; std::shared_ptr model_{nullptr}; @@ -127,6 +127,7 @@ class U2Nnet : public U2NnetBase { paddle::jit::Function forward_encoder_chunk_; paddle::jit::Function forward_attention_decoder_; paddle::jit::Function ctc_activation_; + float cost_time_ = 0.0; }; } // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/asr/recognizer/recognizer_main.cc b/runtime/engine/asr/recognizer/recognizer_main.cc index 141a44f33..99b7b4dd8 100644 --- a/runtime/engine/asr/recognizer/recognizer_main.cc +++ b/runtime/engine/asr/recognizer/recognizer_main.cc @@ -88,7 +88,8 @@ int main(int argc, char* argv[]) { kaldi::Timer timer; recognizer_ptr->AttentionRescoring(); - tot_attention_rescore_time += timer.Elapsed(); + float rescore_time = timer.Elapsed(); + tot_attention_rescore_time += rescore_time; std::string result = recognizer_ptr->GetFinalResult(); if (result.empty()) { @@ -101,7 +102,7 @@ int main(int argc, char* argv[]) { tot_decode_time += local_timer.Elapsed(); LOG(INFO) << utt << " " << result; LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur - << " cost: " << local_timer.Elapsed(); + << " cost: " << local_timer.Elapsed() << " rescore:" << rescore_time; result_writer.Write(utt, result); diff --git a/runtime/examples/u2pp_ol/wenetspeech/RESULTS.md b/runtime/examples/u2pp_ol/wenetspeech/RESULTS.md index 3a3544641..263bca519 100644 --- a/runtime/examples/u2pp_ol/wenetspeech/RESULTS.md +++ b/runtime/examples/u2pp_ol/wenetspeech/RESULTS.md @@ -67,7 +67,7 @@ Other -> 100.00 % N=3 C=0 S=3 D=0 I=0 #### RTF ``` I0417 08:07:15.300631 75784 recognizer_main.cc:113] total wav duration is: 36108.9 sec -I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:16353.7 sec -I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:936.858 sec -I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.4529 -``` \ No newline at end of file +I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:10247.7 sec +I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:908.228 sec +I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.283 +``` diff --git a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_fastdeploy.sh b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_fastdeploy.sh new file mode 100755 index 000000000..1d60eaff4 --- /dev/null +++ b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_fastdeploy.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +data=data +exp=exp +nj=20 + +. utils/parse_options.sh + +mkdir -p $exp +ckpt_dir=./data/model +model_dir=$ckpt_dir/onnx_model/ +aishell_wav_scp=aishell_test.scp +text=$data/test/text + +./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj + +utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.fd.log \ +recognizer_main \ + --use_fbank=true \ + --num_bins=80 \ + --model_path=$model_dir \ + --word_symbol_table=$model_dir/unit.txt \ + --nnet_decoder_chunk=16 \ + --receptive_field_length=7 \ + --subsampling_rate=4 \ + --with_onnx_model=true \ + --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ + --result_wspecifier=ark,t:$data/split${nj}/JOB/recognizer.fd.rsl.ark + + +cat $data/split${nj}/*/recognizer.fd.rsl.ark > $exp/aishell.recognizer.fd.rsl +utils/compute-wer.py --char=1 --v=1 $text $exp/aishell.recognizer.fd.rsl > $exp/aishell.recognizer.fd.err +echo "recognizer fd test have finished!!!" +echo "please checkout in $exp/aishell.recognizer.fd.err" +tail -n 7 $exp/aishell.recognizer.fd.err diff --git a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh new file mode 100755 index 000000000..fb0a19e88 --- /dev/null +++ b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -e + +data=data +exp=exp +nj=20 + +. utils/parse_options.sh + +mkdir -p $exp +ckpt_dir=./data/model +model_dir=$ckpt_dir/onnx_model/ +aishell_wav_scp=aishell_test.scp +text=$data/test/text + +./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj + +lang_dir=./data/lang_test/ +graph=$lang_dir/TLG.fst +word_table=$lang_dir/words.txt + +if [ ! -f $graph ]; then + # download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh + mkdir -p $lang_dir + pushd $lang_dir + wget -c https://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/tlg.zip + unzip tlg.zip + popd +fi + +utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer_wfst_fd.log \ +recognizer_main \ + --use_fbank=true \ + --num_bins=80 \ + --model_path=$model_dir \ + --graph_path=$lang_dir/TLG.fst \ + --word_symbol_table=$word_table \ + --nnet_decoder_chunk=16 \ + --receptive_field_length=7 \ + --subsampling_rate=4 \ + --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ + --rescoring_weight=0.0 \ + --acoustic_scale=2 \ + --result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer_wfst_fd.ark + + +cat $data/split${nj}/*/result_recognizer_wfst_fd.ark > $exp/aishell_recognizer_wfst_fd +utils/compute-wer.py --char=1 --v=1 $text $exp/aishell_recognizer_wfst_fd > $exp/aishell.recognizer_wfst_fd.err +echo "recognizer test have finished!!!" +echo "please checkout in $exp/aishell.recognizer_wfst_fd.err" +tail -n 7 $exp/aishell.recognizer_wfst_fd.err