fix

2 years ago · 707c72c3ea
parent 85a1744ecc
commit 707c72c3ea
8 changed files with 111 additions and 12 deletions
--- a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
@ -87,9 +87,9 @@ void CTCPrefixBeamSearch::AdvanceDecode(
        VLOG(1) << "num_frame_decoded_: " << num_frame_decoded_;
    }
-    VLOG(1) << "AdvanceDecode feat + forward  cost: " << feat_nnet_cost
+    VLOG(2) << "AdvanceDecode feat + forward  cost: " << feat_nnet_cost
            << " sec.";
-    VLOG(1) << "AdvanceDecode search  cost: " << search_cost << " sec.";
+    VLOG(2) << "AdvanceDecode search  cost: " << search_cost << " sec.";
 }
 static bool PrefixScoreCompare(
--- a/runtime/engine/asr/nnet/nnet_producer.cc
+++ b/runtime/engine/asr/nnet/nnet_producer.cc
@ -46,7 +46,6 @@ void NnetProducer::Acceptlikelihood(
 bool NnetProducer::Read(std::vector<kaldi::BaseFloat>* nnet_prob) {
    bool flag = cache_.pop(nnet_prob);
    VLOG(1) << "nnet cache_ size: " << cache_.size();
    return flag;
 }
--- a/runtime/engine/asr/nnet/u2_nnet.cc
+++ b/runtime/engine/asr/nnet/u2_nnet.cc
@ -124,7 +124,15 @@ U2Nnet::U2Nnet(const U2Nnet& other) {
    offset_ = other.offset_;
    // copy model ptr
-    model_ = other.model_->Clone();
+    // model_ = other.model_->Clone();
    // hack, fix later
    #ifdef WITH_GPU
        dev_ = phi::GPUPlace();
    #else
        dev_ = phi::CPUPlace();
    #endif
    paddle::jit::Layer model = paddle::jit::Load(other.opts_.model_path, dev_);
    model_ = std::make_shared<paddle::jit::Layer>(std::move(model));
    ctc_activation_ = model_->Function("ctc_activation");
    subsampling_rate_ = model_->Attribute<int>("subsampling_rate");
    right_context_ = model_->Attribute<int>("right_context");
@ -166,6 +174,7 @@ void U2Nnet::Reset() {
        std::move(paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32));
    encoder_outs_.clear();
    VLOG(1) << "FeedForward cost: " << cost_time_ << " sec. ";
    VLOG(3) << "u2nnet reset";
 }
@ -185,8 +194,10 @@ void U2Nnet::FeedForward(const std::vector<BaseFloat>& features,
    std::vector<kaldi::BaseFloat> ctc_probs;
    ForwardEncoderChunkImpl(
        features, feature_dim, &out->logprobs, &out->vocab_dim);
-    VLOG(1) << "FeedForward cost: " << timer.Elapsed() << " sec. "
+    float forward_chunk_time = timer.Elapsed();
    VLOG(1) << "FeedForward cost: " << forward_chunk_time << " sec. "
            << features.size() / feature_dim << " frames.";
    cost_time_ += forward_chunk_time;
 }
--- a/runtime/engine/asr/nnet/u2_nnet.h
+++ b/runtime/engine/asr/nnet/u2_nnet.h
@ -113,8 +113,8 @@ class U2Nnet : public U2NnetBase {
    void EncoderOuts(
        std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const;
    ModelOptions opts_; // hack, fix later
  private:
    ModelOptions opts_;
    phi::Place dev_;
    std::shared_ptr<paddle::jit::Layer> model_{nullptr};
@ -127,6 +127,7 @@ class U2Nnet : public U2NnetBase {
    paddle::jit::Function forward_encoder_chunk_;
    paddle::jit::Function forward_attention_decoder_;
    paddle::jit::Function ctc_activation_;
    float cost_time_ = 0.0;
 };
 }  // namespace ppspeech
--- a/runtime/engine/asr/recognizer/recognizer_main.cc
+++ b/runtime/engine/asr/recognizer/recognizer_main.cc
@ -88,7 +88,8 @@ int main(int argc, char* argv[]) {
        kaldi::Timer timer;
        recognizer_ptr->AttentionRescoring();
-        tot_attention_rescore_time += timer.Elapsed();
+        float rescore_time = timer.Elapsed();
        tot_attention_rescore_time += rescore_time;
        std::string result = recognizer_ptr->GetFinalResult();
        if (result.empty()) {
@ -101,7 +102,7 @@ int main(int argc, char* argv[]) {
        tot_decode_time += local_timer.Elapsed();
        LOG(INFO) << utt << " " << result;
        LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
-                  << " cost: " << local_timer.Elapsed();
+                  << " cost: " << local_timer.Elapsed() << " rescore:" << rescore_time;
        result_writer.Write(utt, result);
--- a/runtime/examples/u2pp_ol/wenetspeech/RESULTS.md
+++ b/runtime/examples/u2pp_ol/wenetspeech/RESULTS.md
@ -67,7 +67,7 @@ Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
 #### RTF
 ```
 I0417 08:07:15.300631 75784 recognizer_main.cc:113] total wav duration is: 36108.9 sec
-I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:16353.7 sec
+I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:10247.7 sec
-I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:936.858 sec
+I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:908.228 sec
-I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.4529
+I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.283
-```
+```
--- a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_fastdeploy.sh
+++ b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_fastdeploy.sh
@ -0,0 +1,36 @@
 #!/bin/bash
 set -e
 data=data
 exp=exp
 nj=20
 . utils/parse_options.sh
 mkdir -p $exp
 ckpt_dir=./data/model
 model_dir=$ckpt_dir/onnx_model/
 aishell_wav_scp=aishell_test.scp
 text=$data/test/text
 ./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.fd.log \
 recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --model_path=$model_dir \
    --word_symbol_table=$model_dir/unit.txt \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --with_onnx_model=true \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/recognizer.fd.rsl.ark
 cat $data/split${nj}/*/recognizer.fd.rsl.ark > $exp/aishell.recognizer.fd.rsl
 utils/compute-wer.py --char=1 --v=1 $text $exp/aishell.recognizer.fd.rsl > $exp/aishell.recognizer.fd.err
 echo "recognizer fd test have finished!!!"
 echo "please checkout in $exp/aishell.recognizer.fd.err"
 tail -n 7 $exp/aishell.recognizer.fd.err
--- a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh
+++ b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh
@ -0,0 +1,51 @@
 #!/bin/bash
 set -e
 data=data
 exp=exp
 nj=20
 . utils/parse_options.sh
 mkdir -p $exp
 ckpt_dir=./data/model
 model_dir=$ckpt_dir/onnx_model/
 aishell_wav_scp=aishell_test.scp
 text=$data/test/text
 ./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
 lang_dir=./data/lang_test/
 graph=$lang_dir/TLG.fst
 word_table=$lang_dir/words.txt
 if [ ! -f $graph ]; then
    # download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh
    mkdir -p $lang_dir
    pushd $lang_dir
    wget -c https://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/tlg.zip
    unzip tlg.zip
    popd
 fi
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer_wfst_fd.log \
 recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --model_path=$model_dir \
    --graph_path=$lang_dir/TLG.fst \
    --word_symbol_table=$word_table \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --rescoring_weight=0.0 \
    --acoustic_scale=2 \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer_wfst_fd.ark
 cat $data/split${nj}/*/result_recognizer_wfst_fd.ark > $exp/aishell_recognizer_wfst_fd
 utils/compute-wer.py --char=1 --v=1 $text $exp/aishell_recognizer_wfst_fd > $exp/aishell.recognizer_wfst_fd.err
 echo "recognizer test have finished!!!"
 echo "please checkout in $exp/aishell.recognizer_wfst_fd.err"
 tail -n 7 $exp/aishell.recognizer_wfst_fd.err