PaddleSpeech/runtime/engine/asr/decoder/ctc_tlg_decoder.cc

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "decoder/ctc_tlg_decoder.h"

namespace ppspeech {

TLGDecoder::TLGDecoder(TLGDecoderOptions opts) : opts_(opts) {
    fst_ = opts.fst_ptr;
    CHECK(fst_ != nullptr);

    CHECK(!opts.word_symbol_table.empty());
    word_symbol_table_.reset(
        fst::SymbolTable::ReadText(opts.word_symbol_table));

    decoder_.reset(new kaldi::LatticeFasterOnlineDecoder(*fst_, opts.opts));

    Reset();
}

void TLGDecoder::Reset() {
    decoder_->InitDecoding();
    hypotheses_.clear();
    likelihood_.clear();
    olabels_.clear();
    times_.clear();

    num_frame_decoded_ = 0;
    return;
}

void TLGDecoder::InitDecoder() { Reset(); }

void TLGDecoder::AdvanceDecode(
    const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
    while (!decodable->IsLastFrame(num_frame_decoded_)) {
        AdvanceDecoding(decodable.get());
    }
}

void TLGDecoder::AdvanceDecoding(kaldi::DecodableInterface* decodable) {
    decoder_->AdvanceDecoding(decodable, 1);
    num_frame_decoded_++;
}


std::string TLGDecoder::GetPartialResult() {
    if (num_frame_decoded_ == 0) {
        // Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call
        // BestPathEnd if no frames were decoded.")
        return std::string("");
    }
    kaldi::Lattice lat;
    kaldi::LatticeWeight weight;
    std::vector<int> alignment;
    std::vector<int> words_id;
    decoder_->GetBestPath(&lat, false);
    fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);
    std::string words;
    for (int32 idx = 0; idx < words_id.size(); ++idx) {
        std::string word = word_symbol_table_->Find(words_id[idx]);
        words += word;
    }
    return words;
}

void TLGDecoder::FinalizeSearch() {
    decoder_->FinalizeDecoding();
    kaldi::CompactLattice clat;
    decoder_->GetLattice(&clat, true);
    kaldi::Lattice lat, nbest_lat;
    fst::ConvertLattice(clat, &lat);
    fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
    std::vector<kaldi::Lattice> nbest_lats;
    fst::ConvertNbestToVector(nbest_lat, &nbest_lats);

    hypotheses_.clear();
    hypotheses_.reserve(nbest_lats.size());
    likelihood_.clear();
    likelihood_.reserve(nbest_lats.size());
    times_.clear();
    times_.reserve(nbest_lats.size());
    for (auto lat : nbest_lats) {
        kaldi::LatticeWeight weight;
        std::vector<int> hypothese;
        std::vector<int> time;
        std::vector<int> alignment;
        std::vector<int> words_id;
        fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);
        int idx = 0;
        for (; idx < alignment.size() - 1; ++idx) {
            if (alignment[idx] == 0) continue;
            if (alignment[idx] != alignment[idx + 1]) {
                hypothese.push_back(alignment[idx] - 1);
                time.push_back(idx);  // fake time, todo later
            }
        }
        hypothese.push_back(alignment[idx] - 1);
        time.push_back(idx);  // fake time, todo later
        hypotheses_.push_back(hypothese);
        times_.push_back(time);
        olabels_.push_back(words_id);
        likelihood_.push_back(-(weight.Value2() + weight.Value1()));
    }
}

std::string TLGDecoder::GetFinalBestPath() {
    if (num_frame_decoded_ == 0) {
        // Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call
        // BestPathEnd if no frames were decoded.")
        return std::string("");
    }
    kaldi::Lattice lat;
    kaldi::LatticeWeight weight;
    std::vector<int> alignment;
    std::vector<int> words_id;
    decoder_->GetBestPath(&lat, true);
    fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);
    std::string words;
    for (int32 idx = 0; idx < words_id.size(); ++idx) {
        std::string word = word_symbol_table_->Find(words_id[idx]);
        words += word;
    }
    return words;
}

}  // namespace ppspeech
make wfst work & align frame 3 years ago			`// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

add tlg decoder 3 years ago			`#include "decoder/ctc_tlg_decoder.h"`
[engine]add recognizer api && clean params && make a shared decoder resource (#3165) 2 years ago
add tlg decoder 3 years ago			`namespace ppspeech {`

add wfst decoder (#2886) 2 years ago			`TLGDecoder::TLGDecoder(TLGDecoderOptions opts) : opts_(opts) {`
[engine]add recognizer api && clean params && make a shared decoder resource (#3165) 2 years ago			`fst_ = opts.fst_ptr;`
add tlg decoder 3 years ago			`CHECK(fst_ != nullptr);`
ctc prefix beam search for u2, test can run 2 years ago
[engine]add recognizer api && clean params && make a shared decoder resource (#3165) 2 years ago			`CHECK(!opts.word_symbol_table.empty());`
make wfst work & align frame 3 years ago			`word_symbol_table_.reset(`
			`fst::SymbolTable::ReadText(opts.word_symbol_table));`
ctc prefix beam search for u2, test can run 2 years ago
add tlg decoder 3 years ago			`decoder_.reset(new kaldi::LatticeFasterOnlineDecoder(*fst_, opts.opts));`
ctc prefix beam search for u2, test can run 2 years ago
			`Reset();`
			`}`

			`void TLGDecoder::Reset() {`
add tlg decoder 3 years ago			`decoder_->InitDecoding();`
[engine] rename speechx (#2892) * rename speechx * fix wfst decode error * replace reset with make_unique 2 years ago			`hypotheses_.clear();`
			`likelihood_.clear();`
			`olabels_.clear();`
			`times_.clear();`

refactor ctc opts, extract decoder interface, add ctc beamsearch score 2 years ago			`num_frame_decoded_ = 0;`
ctc prefix beam search for u2, test can run 2 years ago			`return;`
add tlg decoder 3 years ago			`}`

format 2 years ago			`void TLGDecoder::InitDecoder() { Reset(); }`
add tlg decoder 3 years ago
make wfst work & align frame 3 years ago			`void TLGDecoder::AdvanceDecode(`
			`const std::shared_ptr<kaldi::DecodableInterface>& decodable) {`
refactor ctc opts, extract decoder interface, add ctc beamsearch score 2 years ago			`while (!decodable->IsLastFrame(num_frame_decoded_)) {`
make wfst work & align frame 3 years ago			`AdvanceDecoding(decodable.get());`
add tlg decoder 3 years ago			`}`
			`}`

			`void TLGDecoder::AdvanceDecoding(kaldi::DecodableInterface* decodable) {`
make wfst work & align frame 3 years ago			`decoder_->AdvanceDecoding(decodable, 1);`
refactor ctc opts, extract decoder interface, add ctc beamsearch score 2 years ago			`num_frame_decoded_++;`
add tlg decoder 3 years ago			`}`

ctc prefix beam search for u2, test can run 2 years ago
add partial result 3 years ago			`std::string TLGDecoder::GetPartialResult() {`
refactor ctc opts, extract decoder interface, add ctc beamsearch score 2 years ago			`if (num_frame_decoded_ == 0) {`
add partial result 3 years ago			`// Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call`
			`// BestPathEnd if no frames were decoded.")`
			`return std::string("");`
			`}`
			`kaldi::Lattice lat;`
			`kaldi::LatticeWeight weight;`
			`std::vector<int> alignment;`
			`std::vector<int> words_id;`
			`decoder_->GetBestPath(&lat, false);`
			`fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);`
			`std::string words;`
			`for (int32 idx = 0; idx < words_id.size(); ++idx) {`
			`std::string word = word_symbol_table_->Find(words_id[idx]);`
			`words += word;`
			`}`
fix #2013; and format 2 years ago			`return words;`
add partial result 3 years ago			`}`

add wfst decoder (#2886) 2 years ago			`void TLGDecoder::FinalizeSearch() {`
			`decoder_->FinalizeDecoding();`
			`kaldi::CompactLattice clat;`
			`decoder_->GetLattice(&clat, true);`
			`kaldi::Lattice lat, nbest_lat;`
			`fst::ConvertLattice(clat, &lat);`
			`fst::ShortestPath(lat, &nbest_lat, opts_.nbest);`
			`std::vector<kaldi::Lattice> nbest_lats;`
			`fst::ConvertNbestToVector(nbest_lat, &nbest_lats);`

			`hypotheses_.clear();`
			`hypotheses_.reserve(nbest_lats.size());`
			`likelihood_.clear();`
			`likelihood_.reserve(nbest_lats.size());`
			`times_.clear();`
			`times_.reserve(nbest_lats.size());`
			`for (auto lat : nbest_lats) {`
			`kaldi::LatticeWeight weight;`
			`std::vector<int> hypothese;`
			`std::vector<int> time;`
			`std::vector<int> alignment;`
			`std::vector<int> words_id;`
			`fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);`
			`int idx = 0;`
			`for (; idx < alignment.size() - 1; ++idx) {`
			`if (alignment[idx] == 0) continue;`
			`if (alignment[idx] != alignment[idx + 1]) {`
			`hypothese.push_back(alignment[idx] - 1);`
			`time.push_back(idx); // fake time, todo later`
			`}`
			`}`
			`hypothese.push_back(alignment[idx] - 1);`
			`time.push_back(idx); // fake time, todo later`
			`hypotheses_.push_back(hypothese);`
			`times_.push_back(time);`
[engine] rename speechx (#2892) * rename speechx * fix wfst decode error * replace reset with make_unique 2 years ago			`olabels_.push_back(words_id);`
add wfst decoder (#2886) 2 years ago			`likelihood_.push_back(-(weight.Value2() + weight.Value1()));`
			`}`
			`}`

add tlg decoder 3 years ago			`std::string TLGDecoder::GetFinalBestPath() {`
refactor ctc opts, extract decoder interface, add ctc beamsearch score 2 years ago			`if (num_frame_decoded_ == 0) {`
fix speechx core dump when stop immediately after start 3 years ago			`// Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call`
			`// BestPathEnd if no frames were decoded.")`
			`return std::string("");`
			`}`
make wfst work & align frame 3 years ago			`kaldi::Lattice lat;`
			`kaldi::LatticeWeight weight;`
			`std::vector<int> alignment;`
			`std::vector<int> words_id;`
			`decoder_->GetBestPath(&lat, true);`
			`fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);`
			`std::string words;`
			`for (int32 idx = 0; idx < words_id.size(); ++idx) {`
			`std::string word = word_symbol_table_->Find(words_id[idx]);`
			`words += word;`
			`}`
			`return words;`
add tlg decoder 3 years ago			`}`
ctc prefix beam search for u2, test can run 2 years ago
format 2 years ago			`} // namespace ppspeech`