PaddleSpeech/runtime/engine/asr/decoder/ctc_prefix_beam_search_deco...

// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
//               2022 Binbin Zhang (binbzha@qq.com)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "decoder/ctc_prefix_beam_search_decoder.h"

#include "base/common.h"
#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_prefix_beam_search_score.h"
#include "utils/math.h"

#ifdef WITH_PROFILING
#include "paddle/fluid/platform/profiler.h"
using paddle::platform::RecordEvent;
using paddle::platform::TracerEventType;
#endif

namespace ppspeech {

CTCPrefixBeamSearch::CTCPrefixBeamSearch(const CTCBeamSearchOptions& opts)
    : opts_(opts) {
    unit_table_ = std::shared_ptr<fst::SymbolTable>(
        fst::SymbolTable::ReadText(opts.word_symbol_table));
    CHECK(unit_table_ != nullptr);

    Reset();
}

void CTCPrefixBeamSearch::Reset() {
    num_frame_decoded_ = 0;

    cur_hyps_.clear();

    hypotheses_.clear();
    likelihood_.clear();
    viterbi_likelihood_.clear();
    times_.clear();
    outputs_.clear();

    // empty hyp with Score
    std::vector<int> empty;
    PrefixScore prefix_score;
    prefix_score.InitEmpty();
    cur_hyps_[empty] = prefix_score;

    outputs_.emplace_back(empty);
    hypotheses_.emplace_back(empty);
    likelihood_.emplace_back(prefix_score.TotalScore());
    times_.emplace_back(empty);
}

void CTCPrefixBeamSearch::InitDecoder() { Reset(); }

void CTCPrefixBeamSearch::AdvanceDecode(
    const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
    double search_cost = 0.0;
    double feat_nnet_cost = 0.0;
    while (1) {
        // forward frame by frame
        kaldi::Timer timer;
        std::vector<kaldi::BaseFloat> frame_prob;
        bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
        feat_nnet_cost += timer.Elapsed();
        if (flag == false) {
            VLOG(2) << "decoder advance decode exit." << frame_prob.size();
            break;
        }

        timer.Reset();
        std::vector<std::vector<kaldi::BaseFloat>> likelihood;
        likelihood.push_back(std::move(frame_prob));
        AdvanceDecoding(likelihood);
        search_cost += timer.Elapsed();

        VLOG(1) << "num_frame_decoded_: " << num_frame_decoded_;
    }
    VLOG(2) << "AdvanceDecode feat + forward  cost: " << feat_nnet_cost
            << " sec.";
    VLOG(2) << "AdvanceDecode search  cost: " << search_cost << " sec.";
}

static bool PrefixScoreCompare(
    const std::pair<std::vector<int>, PrefixScore>& a,
    const std::pair<std::vector<int>, PrefixScore>& b) {
    // log domain
    return a.second.TotalScore() > b.second.TotalScore();
}


void CTCPrefixBeamSearch::AdvanceDecoding(
    const std::vector<std::vector<kaldi::BaseFloat>>& logp) {
#ifdef WITH_PROFILING
    RecordEvent event("CtcPrefixBeamSearch::AdvanceDecoding",
                      TracerEventType::UserDefined,
                      1);
#endif

    if (logp.size() == 0) return;

    int first_beam_size =
        std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);

    for (int t = 0; t < logp.size(); ++t, ++num_frame_decoded_) {
        const std::vector<kaldi::BaseFloat>& logp_t = logp[t];
        std::unordered_map<std::vector<int>, PrefixScore, PrefixScoreHash>
            next_hyps;

        // 1. first beam prune, only select topk candidates
        std::vector<kaldi::BaseFloat> topk_score;
        std::vector<int32_t> topk_index;
        TopK(logp_t, first_beam_size, &topk_score, &topk_index);
        VLOG(2) << "topk: " << num_frame_decoded_ << " "
                << *std::max_element(logp_t.begin(), logp_t.end()) << " "
                << topk_score[0];
        for (int i = 0; i < topk_score.size(); i++) {
            VLOG(2) << "topk: " << num_frame_decoded_ << " " << topk_score[i];
        }

        // 2. token passing
        for (int i = 0; i < topk_index.size(); ++i) {
            int id = topk_index[i];
            auto prob = topk_score[i];

            for (const auto& it : cur_hyps_) {
                const std::vector<int>& prefix = it.first;
                const PrefixScore& prefix_score = it.second;

                // If prefix doesn't exist in next_hyps, next_hyps[prefix] will
                // insert
                // PrefixScore(-inf, -inf) by default, since the default
                // constructor
                // of PrefixScore will set fields b(blank ending Score) and
                // nb(none blank ending Score) to -inf, respectively.

                if (id == opts_.blank) {
                    // case 0: *a + <blank> => *a, *a<blank> + <blank> => *a,
                    // prefix not
                    // change
                    PrefixScore& next_score = next_hyps[prefix];
                    next_score.b =
                        LogSumExp(next_score.b, prefix_score.Score() + prob);

                    // timestamp, blank is slince, not effact timestamp
                    next_score.v_b = prefix_score.ViterbiScore() + prob;
                    next_score.times_b = prefix_score.Times();

                    // Prefix not changed, copy the context from pefix
                    if (context_graph_ && !next_score.has_context) {
                        next_score.CopyContext(prefix_score);
                        next_score.has_context = true;
                    }

                } else if (!prefix.empty() && id == prefix.back()) {
                    // case 1: *a + a => *a, prefix not changed
                    PrefixScore& next_score1 = next_hyps[prefix];
                    next_score1.nb =
                        LogSumExp(next_score1.nb, prefix_score.nb + prob);

                    // timestamp, non-blank symbol effact timestamp
                    if (next_score1.v_nb < prefix_score.v_nb + prob) {
                        // compute viterbi Score
                        next_score1.v_nb = prefix_score.v_nb + prob;
                        if (next_score1.cur_token_prob < prob) {
                            // store max token prob
                            next_score1.cur_token_prob = prob;
                            // update this timestamp as token appeared here.
                            next_score1.times_nb = prefix_score.times_nb;
                            assert(next_score1.times_nb.size() > 0);
                            next_score1.times_nb.back() = num_frame_decoded_;
                        }
                    }

                    // Prefix not changed, copy the context from pefix
                    if (context_graph_ && !next_score1.has_context) {
                        next_score1.CopyContext(prefix_score);
                        next_score1.has_context = true;
                    }

                    // case 2: *a<blank> + a => *aa, prefix changed.
                    std::vector<int> new_prefix(prefix);
                    new_prefix.emplace_back(id);
                    PrefixScore& next_score2 = next_hyps[new_prefix];
                    next_score2.nb =
                        LogSumExp(next_score2.nb, prefix_score.b + prob);

                    // timestamp, non-blank symbol effact timestamp
                    if (next_score2.v_nb < prefix_score.v_b + prob) {
                        // compute viterbi Score
                        next_score2.v_nb = prefix_score.v_b + prob;
                        // new token added
                        next_score2.cur_token_prob = prob;
                        next_score2.times_nb = prefix_score.times_b;
                        next_score2.times_nb.emplace_back(num_frame_decoded_);
                    }

                    // Prefix changed, calculate the context Score.
                    if (context_graph_ && !next_score2.has_context) {
                        next_score2.UpdateContext(
                            context_graph_, prefix_score, id, prefix.size());
                        next_score2.has_context = true;
                    }

                } else {
                    // id != prefix.back()
                    // case 3: *a + b => *ab, *a<blank> +b => *ab
                    std::vector<int> new_prefix(prefix);
                    new_prefix.emplace_back(id);
                    PrefixScore& next_score = next_hyps[new_prefix];
                    next_score.nb =
                        LogSumExp(next_score.nb, prefix_score.Score() + prob);

                    // timetamp, non-blank symbol effact timestamp
                    if (next_score.v_nb < prefix_score.ViterbiScore() + prob) {
                        next_score.v_nb = prefix_score.ViterbiScore() + prob;

                        next_score.cur_token_prob = prob;
                        next_score.times_nb = prefix_score.Times();
                        next_score.times_nb.emplace_back(num_frame_decoded_);
                    }

                    // Prefix changed, calculate the context Score.
                    if (context_graph_ && !next_score.has_context) {
                        next_score.UpdateContext(
                            context_graph_, prefix_score, id, prefix.size());
                        next_score.has_context = true;
                    }
                }
            }  // end for (const auto& it : cur_hyps_)
        }      // end for (int i = 0; i < topk_index.size(); ++i)

        // 3. second beam prune, only keep top n best paths
        std::vector<std::pair<std::vector<int>, PrefixScore>> arr(
            next_hyps.begin(), next_hyps.end());
        int second_beam_size =
            std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
        std::nth_element(arr.begin(),
                         arr.begin() + second_beam_size,
                         arr.end(),
                         PrefixScoreCompare);
        arr.resize(second_beam_size);
        std::sort(arr.begin(), arr.end(), PrefixScoreCompare);

        // 4. update cur_hyps by next_hyps, and get new result
        UpdateHypotheses(arr);
    }  // end for (int t = 0; t < logp.size(); ++t, ++num_frame_decoded_)
}


void CTCPrefixBeamSearch::UpdateHypotheses(
    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hyps) {
    cur_hyps_.clear();

    outputs_.clear();
    hypotheses_.clear();
    likelihood_.clear();
    viterbi_likelihood_.clear();
    times_.clear();

    for (auto& item : hyps) {
        cur_hyps_[item.first] = item.second;

        UpdateOutputs(item);
        hypotheses_.emplace_back(std::move(item.first));
        likelihood_.emplace_back(item.second.TotalScore());
        viterbi_likelihood_.emplace_back(item.second.ViterbiScore());
        times_.emplace_back(item.second.Times());
    }
}

void CTCPrefixBeamSearch::UpdateOutputs(
    const std::pair<std::vector<int>, PrefixScore>& prefix) {
    const std::vector<int>& input = prefix.first;
    const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
    const std::vector<int>& end_boundaries = prefix.second.end_boundaries;

    // add <context> </context> tag
    std::vector<int> output;
    int s = 0;
    int e = 0;
    for (int i = 0; i < input.size(); ++i) {
        output.emplace_back(input[i]);
    }

    outputs_.emplace_back(output);
}

void CTCPrefixBeamSearch::FinalizeSearch() {
    UpdateFinalContext();

    VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;
    int cnt = 0;
    for (int i = 0; i < hypotheses_.size(); i++) {
        VLOG(2) << "hyp " << cnt << " len: " << hypotheses_[i].size()
                << " ctc score: " << likelihood_[i];
        for (int j = 0; j < hypotheses_[i].size(); j++) {
            VLOG(2) << hypotheses_[i][j];
        }
    }
}

void CTCPrefixBeamSearch::UpdateFinalContext() {
    if (context_graph_ == nullptr) return;

    CHECK(hypotheses_.size() == cur_hyps_.size());
    CHECK(hypotheses_.size() == likelihood_.size());

    // We should backoff the context Score/state when the context is
    // not fully matched at the last time.
    for (const auto& prefix : hypotheses_) {
        PrefixScore& prefix_score = cur_hyps_[prefix];
        if (prefix_score.context_score != 0) {
            prefix_score.UpdateContext(
                context_graph_, prefix_score, 0, prefix.size());
        }
    }
    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
                                                              cur_hyps_.end());
    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);

    // Update cur_hyps_ and get new result
    UpdateHypotheses(arr);
}

std::string CTCPrefixBeamSearch::GetBestPath(int index) {
    int n_hyps = Outputs().size();
    CHECK_GT(n_hyps, 0);
    CHECK_LT(index, n_hyps);
    std::vector<int> one = Outputs()[index];
    std::string sentence;
    for (int i = 0; i < one.size(); i++) {
        sentence += unit_table_->Find(one[i]);
    }
    return sentence;
}

std::string CTCPrefixBeamSearch::GetBestPath() { return GetBestPath(0); }

std::vector<std::pair<double, std::string>> CTCPrefixBeamSearch::GetNBestPath(
    int n) {
    int hyps_size = hypotheses_.size();
    CHECK_GT(hyps_size, 0);

    int min_n = n == -1 ? hypotheses_.size() : std::min(n, hyps_size);

    std::vector<std::pair<double, std::string>> n_best;
    n_best.reserve(min_n);

    for (int i = 0; i < min_n; i++) {
        n_best.emplace_back(Likelihood()[i], GetBestPath(i));
    }
    return n_best;
}

std::vector<std::pair<double, std::string>>
CTCPrefixBeamSearch::GetNBestPath() {
    return GetNBestPath(-1);
}

std::string CTCPrefixBeamSearch::GetFinalBestPath() { return GetBestPath(); }

std::string CTCPrefixBeamSearch::GetPartialResult() { return GetBestPath(); }


}  // namespace ppspeech
ctc prefix beam search for u2, test can run 2 years ago			`// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)`
			`// 2022 Binbin Zhang (binbzha@qq.com)`
refactor ctc opts, extract decoder interface, add ctc beamsearch score 2 years ago			`// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
simple ctc prefix beam search compile ok 2 years ago

ctc prefix beam search for u2, test can run 2 years ago			`#include "decoder/ctc_prefix_beam_search_decoder.h"`
format 2 years ago
simple ctc prefix beam search compile ok 2 years ago			`#include "base/common.h"`
			`#include "decoder/ctc_beam_search_opt.h"`
			`#include "decoder/ctc_prefix_beam_search_score.h"`
			`#include "utils/math.h"`

[runtime] add logging module, build on linux and android, normalize option name (#2986) * rename option with WITH_; add logging module to replace with glog * refactor logging module; build pass on linux and andorid 2 years ago			`#ifdef WITH_PROFILING`
simple ctc prefix beam search compile ok 2 years ago			`#include "paddle/fluid/platform/profiler.h"`
			`using paddle::platform::RecordEvent;`
			`using paddle::platform::TracerEventType;`
			`#endif`

			`namespace ppspeech {`

[engine]add recognizer api && clean params && make a shared decoder resource (#3165) 2 years ago			`CTCPrefixBeamSearch::CTCPrefixBeamSearch(const CTCBeamSearchOptions& opts)`
simple ctc prefix beam search compile ok 2 years ago			`: opts_(opts) {`
format 2 years ago			`unit_table_ = std::shared_ptr<fst::SymbolTable>(`
[engine]add recognizer api && clean params && make a shared decoder resource (#3165) 2 years ago			`fst::SymbolTable::ReadText(opts.word_symbol_table));`
add u2 recg 2 years ago			`CHECK(unit_table_ != nullptr);`

ctc prefix beam search for u2, test can run 2 years ago			`Reset();`
simple ctc prefix beam search compile ok 2 years ago			`}`

format 2 years ago			`void CTCPrefixBeamSearch::Reset() {`
simple ctc prefix beam search compile ok 2 years ago			`num_frame_decoded_ = 0;`

			`cur_hyps_.clear();`

ctc prefix beam search for u2, test can run 2 years ago			`hypotheses_.clear();`
			`likelihood_.clear();`
			`viterbi_likelihood_.clear();`
			`times_.clear();`
			`outputs_.clear();`
simple ctc prefix beam search compile ok 2 years ago
ctc prefix beam search for u2, test can run 2 years ago			`// empty hyp with Score`
			`std::vector<int> empty;`
			`PrefixScore prefix_score;`
fix as comment 2 years ago			`prefix_score.InitEmpty();`
ctc prefix beam search for u2, test can run 2 years ago			`cur_hyps_[empty] = prefix_score;`
simple ctc prefix beam search compile ok 2 years ago
ctc prefix beam search for u2, test can run 2 years ago			`outputs_.emplace_back(empty);`
			`hypotheses_.emplace_back(empty);`
			`likelihood_.emplace_back(prefix_score.TotalScore());`
			`times_.emplace_back(empty);`
format 2 years ago			`}`
simple ctc prefix beam search compile ok 2 years ago
[engine] rename speechx (#2892) * rename speechx * fix wfst decode error * replace reset with make_unique 2 years ago			`void CTCPrefixBeamSearch::InitDecoder() { Reset(); }`
simple ctc prefix beam search compile ok 2 years ago
			`void CTCPrefixBeamSearch::AdvanceDecode(`
			`const std::shared_ptr<kaldi::DecodableInterface>& decodable) {`
[speechx] fix compile and add more doc (#2591) * update u2pp latest static graph * add requirement * add utils * update doc * update result * update result * update result * fix cmake * imporove scripts * update result * add quant model and script * add profiling timer 2 years ago			`double search_cost = 0.0;`
			`double feat_nnet_cost = 0.0;`
simple ctc prefix beam search compile ok 2 years ago			`while (1) {`
ctc prefix beam search for u2, test can run 2 years ago			`// forward frame by frame`
[speechx] fix compile and add more doc (#2591) * update u2pp latest static graph * add requirement * add utils * update doc * update result * update result * update result * fix cmake * imporove scripts * update result * add quant model and script * add profiling timer 2 years ago			`kaldi::Timer timer;`
simple ctc prefix beam search compile ok 2 years ago			`std::vector<kaldi::BaseFloat> frame_prob;`
			`bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);`
[speechx] fix compile and add more doc (#2591) * update u2pp latest static graph * add requirement * add utils * update doc * update result * update result * update result * fix cmake * imporove scripts * update result * add quant model and script * add profiling timer 2 years ago			`feat_nnet_cost += timer.Elapsed();`
more debug info 2 years ago			`if (flag == false) {`
[speechx] thread decode (#2839) * fix nnet thread crash && rescore cost time * add nnet thread main 2 years ago			`VLOG(2) << "decoder advance decode exit." << frame_prob.size();`
more debug info 2 years ago			`break;`
			`}`
ctc prefix beam search for u2, test can run 2 years ago
[speechx] fix compile and add more doc (#2591) * update u2pp latest static graph * add requirement * add utils * update doc * update result * update result * update result * fix cmake * imporove scripts * update result * add quant model and script * add profiling timer 2 years ago			`timer.Reset();`
simple ctc prefix beam search compile ok 2 years ago			`std::vector<std::vector<kaldi::BaseFloat>> likelihood;`
[speechx]Speechx directory refactor (#2746) * refactor directory 2 years ago			`likelihood.push_back(std::move(frame_prob));`
simple ctc prefix beam search compile ok 2 years ago			`AdvanceDecoding(likelihood);`
[speechx] fix compile and add more doc (#2591) * update u2pp latest static graph * add requirement * add utils * update doc * update result * update result * update result * fix cmake * imporove scripts * update result * add quant model and script * add profiling timer 2 years ago			`search_cost += timer.Elapsed();`

[speechx] thread decode (#2839) * fix nnet thread crash && rescore cost time * add nnet thread main 2 years ago			`VLOG(1) << "num_frame_decoded_: " << num_frame_decoded_;`
simple ctc prefix beam search compile ok 2 years ago			`}`
[engine] add wfst recognizer in example (#3173) * update wfst script * add skip blank 2 years ago			`VLOG(2) << "AdvanceDecode feat + forward cost: " << feat_nnet_cost`
[speechx] fix compile and add more doc (#2591) * update u2pp latest static graph * add requirement * add utils * update doc * update result * update result * update result * fix cmake * imporove scripts * update result * add quant model and script * add profiling timer 2 years ago			`<< " sec.";`
[engine] add wfst recognizer in example (#3173) * update wfst script * add skip blank 2 years ago			`VLOG(2) << "AdvanceDecode search cost: " << search_cost << " sec.";`
simple ctc prefix beam search compile ok 2 years ago			`}`

			`static bool PrefixScoreCompare(`
			`const std::pair<std::vector<int>, PrefixScore>& a,`
			`const std::pair<std::vector<int>, PrefixScore>& b) {`
ctc prefix beam search for u2, test can run 2 years ago			`// log domain`
			`return a.second.TotalScore() > b.second.TotalScore();`
simple ctc prefix beam search compile ok 2 years ago			`}`


ctc prefix beam search for u2, test can run 2 years ago			`void CTCPrefixBeamSearch::AdvanceDecoding(`
			`const std::vector<std::vector<kaldi::BaseFloat>>& logp) {`
[runtime] add logging module, build on linux and android, normalize option name (#2986) * rename option with WITH_; add logging module to replace with glog * refactor logging module; build pass on linux and andorid 2 years ago			`#ifdef WITH_PROFILING`
ctc prefix beam search for u2, test can run 2 years ago			`RecordEvent event("CtcPrefixBeamSearch::AdvanceDecoding",`
			`TracerEventType::UserDefined,`
			`1);`
simple ctc prefix beam search compile ok 2 years ago			`#endif`

ctc prefix beam search for u2, test can run 2 years ago			`if (logp.size() == 0) return;`

			`int first_beam_size =`
			`std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);`

			`for (int t = 0; t < logp.size(); ++t, ++num_frame_decoded_) {`
fix as comment 2 years ago			`const std::vector<kaldi::BaseFloat>& logp_t = logp[t];`
ctc prefix beam search for u2, test can run 2 years ago			`std::unordered_map<std::vector<int>, PrefixScore, PrefixScoreHash>`
			`next_hyps;`

			`// 1. first beam prune, only select topk candidates`
fix as comment 2 years ago			`std::vector<kaldi::BaseFloat> topk_score;`
ctc prefix beam search for u2, test can run 2 years ago			`std::vector<int32_t> topk_index;`
			`TopK(logp_t, first_beam_size, &topk_score, &topk_index);`
format code 2 years ago			`VLOG(2) << "topk: " << num_frame_decoded_ << " "`
			`<< *std::max_element(logp_t.begin(), logp_t.end()) << " "`
			`<< topk_score[0];`
			`for (int i = 0; i < topk_score.size(); i++) {`
			`VLOG(2) << "topk: " << num_frame_decoded_ << " " << topk_score[i];`
more debug info 2 years ago			`}`
format code 2 years ago
ctc prefix beam search for u2, test can run 2 years ago			`// 2. token passing`
			`for (int i = 0; i < topk_index.size(); ++i) {`
			`int id = topk_index[i];`
			`auto prob = topk_score[i];`

			`for (const auto& it : cur_hyps_) {`
			`const std::vector<int>& prefix = it.first;`
			`const PrefixScore& prefix_score = it.second;`

			`// If prefix doesn't exist in next_hyps, next_hyps[prefix] will`
			`// insert`
			`// PrefixScore(-inf, -inf) by default, since the default`
			`// constructor`
			`// of PrefixScore will set fields b(blank ending Score) and`
			`// nb(none blank ending Score) to -inf, respectively.`

			`if (id == opts_.blank) {`
			`// case 0: a + <blank> => a, a<blank> + <blank> => a,`
			`// prefix not`
			`// change`
			`PrefixScore& next_score = next_hyps[prefix];`
			`next_score.b =`
			`LogSumExp(next_score.b, prefix_score.Score() + prob);`

			`// timestamp, blank is slince, not effact timestamp`
			`next_score.v_b = prefix_score.ViterbiScore() + prob;`
			`next_score.times_b = prefix_score.Times();`

			`// Prefix not changed, copy the context from pefix`
			`if (context_graph_ && !next_score.has_context) {`
			`next_score.CopyContext(prefix_score);`
			`next_score.has_context = true;`
			`}`

			`} else if (!prefix.empty() && id == prefix.back()) {`
			`// case 1: a + a => a, prefix not changed`
			`PrefixScore& next_score1 = next_hyps[prefix];`
			`next_score1.nb =`
			`LogSumExp(next_score1.nb, prefix_score.nb + prob);`

			`// timestamp, non-blank symbol effact timestamp`
			`if (next_score1.v_nb < prefix_score.v_nb + prob) {`
			`// compute viterbi Score`
			`next_score1.v_nb = prefix_score.v_nb + prob;`
			`if (next_score1.cur_token_prob < prob) {`
			`// store max token prob`
			`next_score1.cur_token_prob = prob;`
			`// update this timestamp as token appeared here.`
			`next_score1.times_nb = prefix_score.times_nb;`
			`assert(next_score1.times_nb.size() > 0);`
			`next_score1.times_nb.back() = num_frame_decoded_;`
			`}`
			`}`

			`// Prefix not changed, copy the context from pefix`
			`if (context_graph_ && !next_score1.has_context) {`
			`next_score1.CopyContext(prefix_score);`
			`next_score1.has_context = true;`
			`}`

			`// case 2: a<blank> + a => aa, prefix changed.`
			`std::vector<int> new_prefix(prefix);`
			`new_prefix.emplace_back(id);`
			`PrefixScore& next_score2 = next_hyps[new_prefix];`
			`next_score2.nb =`
			`LogSumExp(next_score2.nb, prefix_score.b + prob);`

			`// timestamp, non-blank symbol effact timestamp`
			`if (next_score2.v_nb < prefix_score.v_b + prob) {`
			`// compute viterbi Score`
			`next_score2.v_nb = prefix_score.v_b + prob;`
			`// new token added`
			`next_score2.cur_token_prob = prob;`
			`next_score2.times_nb = prefix_score.times_b;`
			`next_score2.times_nb.emplace_back(num_frame_decoded_);`
			`}`

			`// Prefix changed, calculate the context Score.`
			`if (context_graph_ && !next_score2.has_context) {`
			`next_score2.UpdateContext(`
			`context_graph_, prefix_score, id, prefix.size());`
			`next_score2.has_context = true;`
			`}`

			`} else {`
			`// id != prefix.back()`
			`// case 3: a + b => ab, a<blank> +b => ab`
			`std::vector<int> new_prefix(prefix);`
			`new_prefix.emplace_back(id);`
			`PrefixScore& next_score = next_hyps[new_prefix];`
			`next_score.nb =`
			`LogSumExp(next_score.nb, prefix_score.Score() + prob);`

			`// timetamp, non-blank symbol effact timestamp`
			`if (next_score.v_nb < prefix_score.ViterbiScore() + prob) {`
			`next_score.v_nb = prefix_score.ViterbiScore() + prob;`

			`next_score.cur_token_prob = prob;`
			`next_score.times_nb = prefix_score.Times();`
			`next_score.times_nb.emplace_back(num_frame_decoded_);`
			`}`

			`// Prefix changed, calculate the context Score.`
			`if (context_graph_ && !next_score.has_context) {`
			`next_score.UpdateContext(`
			`context_graph_, prefix_score, id, prefix.size());`
			`next_score.has_context = true;`
			`}`
			`}`
			`} // end for (const auto& it : cur_hyps_)`
			`} // end for (int i = 0; i < topk_index.size(); ++i)`

			`// 3. second beam prune, only keep top n best paths`
			`std::vector<std::pair<std::vector<int>, PrefixScore>> arr(`
			`next_hyps.begin(), next_hyps.end());`
			`int second_beam_size =`
			`std::min(static_cast<int>(arr.size()), opts_.second_beam_size);`
			`std::nth_element(arr.begin(),`
			`arr.begin() + second_beam_size,`
			`arr.end(),`
			`PrefixScoreCompare);`
			`arr.resize(second_beam_size);`
			`std::sort(arr.begin(), arr.end(), PrefixScoreCompare);`

			`// 4. update cur_hyps by next_hyps, and get new result`
			`UpdateHypotheses(arr);`
			`} // end for (int t = 0; t < logp.size(); ++t, ++num_frame_decoded_)`
simple ctc prefix beam search compile ok 2 years ago			`}`


			`void CTCPrefixBeamSearch::UpdateHypotheses(`
			`const std::vector<std::pair<std::vector<int>, PrefixScore>>& hyps) {`
ctc prefix beam search for u2, test can run 2 years ago			`cur_hyps_.clear();`

			`outputs_.clear();`
			`hypotheses_.clear();`
			`likelihood_.clear();`
			`viterbi_likelihood_.clear();`
			`times_.clear();`

			`for (auto& item : hyps) {`
			`cur_hyps_[item.first] = item.second;`

			`UpdateOutputs(item);`
			`hypotheses_.emplace_back(std::move(item.first));`
			`likelihood_.emplace_back(item.second.TotalScore());`
			`viterbi_likelihood_.emplace_back(item.second.ViterbiScore());`
			`times_.emplace_back(item.second.Times());`
			`}`
simple ctc prefix beam search compile ok 2 years ago			`}`

ctc prefix beam search for u2, test can run 2 years ago			`void CTCPrefixBeamSearch::UpdateOutputs(`
			`const std::pair<std::vector<int>, PrefixScore>& prefix) {`
			`const std::vector<int>& input = prefix.first;`
			`const std::vector<int>& start_boundaries = prefix.second.start_boundaries;`
			`const std::vector<int>& end_boundaries = prefix.second.end_boundaries;`

			`// add <context> </context> tag`
			`std::vector<int> output;`
			`int s = 0;`
			`int e = 0;`
			`for (int i = 0; i < input.size(); ++i) {`
			`output.emplace_back(input[i]);`
			`}`
simple ctc prefix beam search compile ok 2 years ago
ctc prefix beam search for u2, test can run 2 years ago			`outputs_.emplace_back(output);`
			`}`

format code 2 years ago			`void CTCPrefixBeamSearch::FinalizeSearch() {`
			`UpdateFinalContext();`

more debug info 2 years ago			`VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;`
			`int cnt = 0;`
format code 2 years ago			`for (int i = 0; i < hypotheses_.size(); i++) {`
			`VLOG(2) << "hyp " << cnt << " len: " << hypotheses_[i].size()`
			`<< " ctc score: " << likelihood_[i];`
			`for (int j = 0; j < hypotheses_[i].size(); j++) {`
			`VLOG(2) << hypotheses_[i][j];`
more debug info 2 years ago			`}`
			`}`
			`}`
simple ctc prefix beam search compile ok 2 years ago
			`void CTCPrefixBeamSearch::UpdateFinalContext() {`
ctc prefix beam search for u2, test can run 2 years ago			`if (context_graph_ == nullptr) return;`

			`CHECK(hypotheses_.size() == cur_hyps_.size());`
			`CHECK(hypotheses_.size() == likelihood_.size());`

			`// We should backoff the context Score/state when the context is`
			`// not fully matched at the last time.`
			`for (const auto& prefix : hypotheses_) {`
			`PrefixScore& prefix_score = cur_hyps_[prefix];`
			`if (prefix_score.context_score != 0) {`
format 2 years ago			`prefix_score.UpdateContext(`
			`context_graph_, prefix_score, 0, prefix.size());`
ctc prefix beam search for u2, test can run 2 years ago			`}`
simple ctc prefix beam search compile ok 2 years ago			`}`
ctc prefix beam search for u2, test can run 2 years ago			`std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),`
			`cur_hyps_.end());`
			`std::sort(arr.begin(), arr.end(), PrefixScoreCompare);`

			`// Update cur_hyps_ and get new result`
			`UpdateHypotheses(arr);`
			`}`

format 2 years ago			`std::string CTCPrefixBeamSearch::GetBestPath(int index) {`
ctc prefix beam search for u2, test can run 2 years ago			`int n_hyps = Outputs().size();`
cpplint 2 years ago			`CHECK_GT(n_hyps, 0);`
			`CHECK_LT(index, n_hyps);`
ctc prefix beam search for u2, test can run 2 years ago			`std::vector<int> one = Outputs()[index];`
add u2 recg 2 years ago			`std::string sentence;`
format 2 years ago			`for (int i = 0; i < one.size(); i++) {`
add u2 recg 2 years ago			`sentence += unit_table_->Find(one[i]);`
			`}`
			`return sentence;`
format 2 years ago			`}`
ctc prefix beam search for u2, test can run 2 years ago
format 2 years ago			`std::string CTCPrefixBeamSearch::GetBestPath() { return GetBestPath(0); }`
ctc prefix beam search for u2, test can run 2 years ago
format 2 years ago			`std::vector<std::pair<double, std::string>> CTCPrefixBeamSearch::GetNBestPath(`
			`int n) {`
			`int hyps_size = hypotheses_.size();`
cpplint 2 years ago			`CHECK_GT(hyps_size, 0);`
ctc prefix beam search for u2, test can run 2 years ago
format 2 years ago			`int min_n = n == -1 ? hypotheses_.size() : std::min(n, hyps_size);`
ctc prefix beam search for u2, test can run 2 years ago
format 2 years ago			`std::vector<std::pair<double, std::string>> n_best;`
			`n_best.reserve(min_n);`
ctc prefix beam search for u2, test can run 2 years ago
format 2 years ago			`for (int i = 0; i < min_n; i++) {`
			`n_best.emplace_back(Likelihood()[i], GetBestPath(i));`
			`}`
			`return n_best;`
			`}`
ctc prefix beam search for u2, test can run 2 years ago
format 2 years ago			`std::vector<std::pair<double, std::string>>`
			`CTCPrefixBeamSearch::GetNBestPath() {`
ctc prefix beam search for u2, test can run 2 years ago			`return GetNBestPath(-1);`
			`}`

format 2 years ago			`std::string CTCPrefixBeamSearch::GetFinalBestPath() { return GetBestPath(); }`

			`std::string CTCPrefixBeamSearch::GetPartialResult() { return GetBestPath(); }`
simple ctc prefix beam search compile ok 2 years ago

fix as comment 2 years ago			`} // namespace ppspeech`