PaddleSpeech/runtime/engine/asr/nnet/nnet_producer.cc

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "nnet/nnet_producer.h"

#include "matrix/kaldi-matrix.h"

namespace ppspeech {

using kaldi::BaseFloat;
using std::vector;

NnetProducer::NnetProducer(std::shared_ptr<NnetBase> nnet,
                           std::shared_ptr<FrontendInterface> frontend,
                           float blank_threshold)
    : nnet_(nnet), frontend_(frontend), blank_threshold_(blank_threshold) {
    Reset();
}

void NnetProducer::Accept(const std::vector<kaldi::BaseFloat>& inputs) {
    frontend_->Accept(inputs);
}

void NnetProducer::Acceptlikelihood(
    const kaldi::Matrix<BaseFloat>& likelihood) {
    std::vector<BaseFloat> prob;
    prob.resize(likelihood.NumCols());
    for (size_t idx = 0; idx < likelihood.NumRows(); ++idx) {
        for (size_t col = 0; col < likelihood.NumCols(); ++col) {
            prob[col] = likelihood(idx, col);
        }
        cache_.push_back(prob);
    }
}

bool NnetProducer::Read(std::vector<kaldi::BaseFloat>* nnet_prob) {
    bool flag = cache_.pop(nnet_prob);
    return flag;
}

bool NnetProducer::Compute() {
    vector<BaseFloat> features;
    if (frontend_ == NULL || frontend_->Read(&features) == false) {
        // no feat or frontend_ not init.
        if (frontend_->IsFinished() == true) {
            finished_ = true;
        }
        return false;
    }
    CHECK_GE(frontend_->Dim(), 0);
    VLOG(1) << "Forward in " << features.size() / frontend_->Dim() << " feats.";

    NnetOut out;
    nnet_->FeedForward(features, frontend_->Dim(), &out);
    int32& vocab_dim = out.vocab_dim;
    size_t nframes = out.logprobs.size() / vocab_dim;
    VLOG(1) << "Forward out " << nframes << " decoder frames.";
    for (size_t idx = 0; idx < nframes; ++idx) {
        std::vector<BaseFloat> logprob(
            out.logprobs.data() + idx * vocab_dim,
            out.logprobs.data() + (idx + 1) * vocab_dim);
        // process blank prob
        float blank_prob = std::exp(logprob[0]);
        if (blank_prob > blank_threshold_) {
            last_frame_logprob_ = logprob;
            is_last_frame_skip_ = true;
            continue;
        } else {
            int cur_max = std::max(logprob.begin(), logprob.end()) - logprob.begin();
            if (cur_max == last_max_elem_ && cur_max != 0 && is_last_frame_skip_) {
                cache_.push_back(last_frame_logprob_);
                last_max_elem_ = cur_max;
            }
            last_max_elem_ = cur_max;
            is_last_frame_skip_ = false; 
            cache_.push_back(logprob);
        }
    }
    return true;
}

void NnetProducer::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                                      float reverse_weight,
                                      std::vector<float>* rescoring_score) {
    nnet_->AttentionRescoring(hyps, reverse_weight, rescoring_score);
}

}  // namespace ppspeech
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`#include "nnet/nnet_producer.h"`
[runtime] optimization compile and add vad interface (#3026) * vad recipe ok * refactor vad, add vad conf, vad inerface, vad recipe * format * install vad lib/bin/inc * using cpack * add vad doc, fix vad state name * add comment * refactor fastdeploy download * add vad jni; format code * add timer; compute vad rtf; vad add beam param * andorid find library * fix log; add vad rtf * fix glog * fix BUILD_TYPE bug * update doc * rm jni 2 years ago
[speechx] rm openblas && refactor kaldi-matrix, kaldi-vector (#2824) * rm openblas && refactor kaldi-matrix kaldi-vector 2 years ago			`#include "matrix/kaldi-matrix.h"`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago
			`namespace ppspeech {`

			`using kaldi::BaseFloat;`
[runtime] optimization compile and add vad interface (#3026) * vad recipe ok * refactor vad, add vad conf, vad inerface, vad recipe * format * install vad lib/bin/inc * using cpack * add vad doc, fix vad state name * add comment * refactor fastdeploy download * add vad jni; format code * add timer; compute vad rtf; vad add beam param * andorid find library * fix log; add vad rtf * fix glog * fix BUILD_TYPE bug * update doc * rm jni 2 years ago			`using std::vector;`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago
			`NnetProducer::NnetProducer(std::shared_ptr<NnetBase> nnet,`
[engine] add wfst recognizer in example (#3173) * update wfst script * add skip blank 2 years ago			`std::shared_ptr<FrontendInterface> frontend,`
			`float blank_threshold)`
			`: nnet_(nnet), frontend_(frontend), blank_threshold_(blank_threshold) {`
add wfst decoder (#2886) 2 years ago			`Reset();`
			`}`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago
[speechx]add kaldi-native-fbank && refactor frontend (#2794) * replace kaldi-fbank with kaldi-native-fbank * make kaldi-native-fbank work 2 years ago			`void NnetProducer::Accept(const std::vector<kaldi::BaseFloat>& inputs) {`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`frontend_->Accept(inputs);`
			`}`

			`void NnetProducer::Acceptlikelihood(`
			`const kaldi::Matrix<BaseFloat>& likelihood) {`
			`std::vector<BaseFloat> prob;`
			`prob.resize(likelihood.NumCols());`
			`for (size_t idx = 0; idx < likelihood.NumRows(); ++idx) {`
			`for (size_t col = 0; col < likelihood.NumCols(); ++col) {`
			`prob[col] = likelihood(idx, col);`
			`}`
[speechx] thread decode (#2839) * fix nnet thread crash && rescore cost time * add nnet thread main 2 years ago			`cache_.push_back(prob);`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`}`
			`}`

			`bool NnetProducer::Read(std::vector<kaldi::BaseFloat>* nnet_prob) {`
[speechx] thread decode (#2839) * fix nnet thread crash && rescore cost time * add nnet thread main 2 years ago			`bool flag = cache_.pop(nnet_prob);`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`return flag;`
			`}`

			`bool NnetProducer::Compute() {`
[speechx]add kaldi-native-fbank && refactor frontend (#2794) * replace kaldi-fbank with kaldi-native-fbank * make kaldi-native-fbank work 2 years ago			`vector<BaseFloat> features;`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`if (frontend_ == NULL \|\| frontend_->Read(&features) == false) {`
			`// no feat or frontend_ not init.`
[Engine] recognizer controller refactor (#3139) * refactor recognizer_controller * clean frontend file 2 years ago			`if (frontend_->IsFinished() == true) {`
			`finished_ = true;`
			`}`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`return false;`
			`}`
			`CHECK_GE(frontend_->Dim(), 0);`
[speechx] thread decode (#2839) * fix nnet thread crash && rescore cost time * add nnet thread main 2 years ago			`VLOG(1) << "Forward in " << features.size() / frontend_->Dim() << " feats.";`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago
			`NnetOut out;`
			`nnet_->FeedForward(features, frontend_->Dim(), &out);`
			`int32& vocab_dim = out.vocab_dim;`
[speechx]add kaldi-native-fbank && refactor frontend (#2794) * replace kaldi-fbank with kaldi-native-fbank * make kaldi-native-fbank work 2 years ago			`size_t nframes = out.logprobs.size() / vocab_dim;`
[speechx] thread decode (#2839) * fix nnet thread crash && rescore cost time * add nnet thread main 2 years ago			`VLOG(1) << "Forward out " << nframes << " decoder frames.";`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`for (size_t idx = 0; idx < nframes; ++idx) {`
[speechx]add kaldi-native-fbank && refactor frontend (#2794) * replace kaldi-fbank with kaldi-native-fbank * make kaldi-native-fbank work 2 years ago			`std::vector<BaseFloat> logprob(`
			`out.logprobs.data() + idx * vocab_dim,`
			`out.logprobs.data() + (idx + 1) * vocab_dim);`
[engine] add wfst recognizer in example (#3173) * update wfst script * add skip blank 2 years ago			`// process blank prob`
			`float blank_prob = std::exp(logprob[0]);`
			`if (blank_prob > blank_threshold_) {`
			`last_frame_logprob_ = logprob;`
			`is_last_frame_skip_ = true;`
			`continue;`
			`} else {`
			`int cur_max = std::max(logprob.begin(), logprob.end()) - logprob.begin();`
			`if (cur_max == last_max_elem_ && cur_max != 0 && is_last_frame_skip_) {`
			`cache_.push_back(last_frame_logprob_);`
			`last_max_elem_ = cur_max;`
			`}`
			`last_max_elem_ = cur_max;`
			`is_last_frame_skip_ = false;`
			`cache_.push_back(logprob);`
			`}`
[Speechx] add nnet prob cache && make 2 thread decode work (#2769) * add nnet cache && make 2 thread work * do not compile websocket 2 years ago			`}`
			`return true;`
			`}`

			`void NnetProducer::AttentionRescoring(const std::vector<std::vector<int>>& hyps,`
			`float reverse_weight,`
			`std::vector<float>* rescoring_score) {`
			`nnet_->AttentionRescoring(hyps, reverse_weight, rescoring_score);`
			`}`

[speechx]add kaldi-native-fbank && refactor frontend (#2794) * replace kaldi-fbank with kaldi-native-fbank * make kaldi-native-fbank work 2 years ago			`} // namespace ppspeech`