PaddleSpeech/third_party/ctc_decoders/ctc_beam_search_decoder.h

// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef CTC_BEAM_SEARCH_DECODER_H_
#define CTC_BEAM_SEARCH_DECODER_H_

#include <string>
#include <utility>
#include <vector>

#include "scorer.h"

/* CTC Beam Search Decoder

 * Parameters:
 *     probs_seq: 2-D vector that each element is a vector of probabilities
 *               over vocabulary of one time step.
 *     vocabulary: A vector of vocabulary.
 *     beam_size: The width of beam search.
 *     cutoff_prob: Cutoff probability for pruning.
 *     cutoff_top_n: Cutoff number for pruning.
 *     ext_scorer: External scorer to evaluate a prefix, which consists of
 *                 n-gram language model scoring and word insertion term.
 *                 Default null, decoding the input sample without scorer.
 * Return:
 *     A vector that each element is a pair of score  and decoding result,
 *     in desending order.
*/
std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
    const std::vector<std::vector<double>> &probs_seq,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    double cutoff_prob = 1.0,
    size_t cutoff_top_n = 40,
    Scorer *ext_scorer = nullptr,
    size_t blank_id = 0);


/* CTC Beam Search Decoder for batch data

 * Parameters:
 *     probs_seq: 3-D vector that each element is a 2-D vector that can be used
 *                by ctc_beam_search_decoder().
 *     vocabulary: A vector of vocabulary.
 *     beam_size: The width of beam search.
 *     num_processes: Number of threads for beam search.
 *     cutoff_prob: Cutoff probability for pruning.
 *     cutoff_top_n: Cutoff number for pruning.
 *     ext_scorer: External scorer to evaluate a prefix, which consists of
 *                 n-gram language model scoring and word insertion term.
 *                 Default null, decoding the input sample without scorer.
 * Return:
 *     A 2-D vector that each element is a vector of beam search decoding
 *     result for one audio sample.
*/
std::vector<std::vector<std::pair<double, std::string>>>
ctc_beam_search_decoding_batch(
    const std::vector<std::vector<std::vector<double>>> &probs_split,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    size_t num_processes,
    double cutoff_prob = 1.0,
    size_t cutoff_top_n = 40,
    Scorer *ext_scorer = nullptr,
    size_t blank_id = 0);

/**
 * Store the root and prefixes for decoder
 */

class CtcBeamSearchDecoderStorage {
  public:
    PathTrie *root = nullptr;
    std::vector<PathTrie *> prefixes;

    CtcBeamSearchDecoderStorage() {
        // init prefixes' root
        this->root = new PathTrie();
        this->root->log_prob_b_prev = 0.0;
        // The score of root is in log scale.Since the prob=1.0, the prob score
        // in log scale is 0.0
        this->root->score = root->log_prob_b_prev;
        // std::vector<PathTrie *> prefixes;
        this->prefixes.push_back(root);
    };

    ~CtcBeamSearchDecoderStorage() {
        if (root != nullptr) {
            delete root;
            root = nullptr;
        }
    };
};

/**
 * The ctc beam search decoder, support batchsize >= 1
 */
class CtcBeamSearchDecoderBatch {
  public:
    CtcBeamSearchDecoderBatch(const std::vector<std::string> &vocabulary,
                              size_t batch_size,
                              size_t beam_size,
                              size_t num_processes,
                              double cutoff_prob,
                              size_t cutoff_top_n,
                              Scorer *ext_scorer,
                              size_t blank_id);

    ~CtcBeamSearchDecoderBatch();
    void next(const std::vector<std::vector<std::vector<double>>> &probs_split,
              const std::vector<std::string> &has_value);

    std::vector<std::vector<std::pair<double, std::string>>> decode();

    void reset_state(size_t batch_size,
                     size_t beam_size,
                     size_t num_processes,
                     double cutoff_prob,
                     size_t cutoff_top_n);

  private:
    std::vector<std::string> vocabulary;
    size_t batch_size;
    size_t beam_size;
    size_t num_processes;
    double cutoff_prob;
    size_t cutoff_top_n;
    Scorer *ext_scorer;
    size_t blank_id;
    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>
        decoder_storage_vector;
};

/**
 * function for chunk decoding
 */
void ctc_beam_search_decode_chunk(
    PathTrie *root,
    std::vector<PathTrie *> &prefixes,
    const std::vector<std::vector<double>> &probs_seq,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    double cutoff_prob,
    size_t cutoff_top_n,
    Scorer *ext_scorer,
    size_t blank_id);

std::vector<std::pair<double, std::string>> get_decode_result(
    std::vector<PathTrie *> &prefixes,
    const std::vector<std::string> &vocabulary,
    size_t beam_size,
    Scorer *ext_scorer);

/**
 * free the CtcBeamSearchDecoderStorage
 */
void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage);

/**
 * initialize the root
 */
void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer);

#endif  // CTC_BEAM_SEARCH_DECODER_H_
Support paddle 2.x (#538) * 2.x model * model test pass * fix data * fix soundfile with flac support * one thread dataloader test pass * export feasture size add trainer and utils add setup model and dataloader update travis using Bionic dist * add venv; test under venv * fix unittest; train and valid * add train and config * add config and train script * fix ctc cuda memcopy error * fix imports * fix train valid log * fix dataset batch shuffle shift start from 1 fix rank_zero_only decreator error close tensorboard when train over add decoding config and code * test process can run * test with decoding * test and infer with decoding * fix infer * fix ctc loss lr schedule sortagrad logger * aishell egs * refactor train add aishell egs * fix dataset batch shuffle and add batch sampler log print model parameter * fix model and ctc * sequence_mask make all inputs zeros, which cause grad be zero, this is a bug of LessThanOp add grad clip by global norm add model train test notebook * ctc loss remove run prefix using ord value as text id * using unk when training compute_loss need text ids ord id using in test mode, which compute wer/cer * fix tester * add lr_deacy refactor code * fix tools * fix ci add tune fix gru model bugs add dataset and model test * fix decoding * refactor repo fix decoding * fix musan and rir dataset * refactor io, loss, conv, rnn, gradclip, model, utils * fix ci and import * refactor model add export jit model * add deploy bin and test it * rm uselss egs * add layer tools * refactor socket server new model from pretrain * remve useless * fix instability loss and grad nan or inf for librispeech training * fix sampler * fix libri train.sh * fix doc * add license on cpp * fix doc * fix libri script * fix install * clip 5 wer 7.39, clip 400 wer 7.54, 1.8 clip 400 baseline 7.49 4 years ago			`// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`//`
[ctcdecoders]fix the licence (#1336) * fix the licence, test=doc * Update ctc_beam_search_decoder.cpp * Update ctc_beam_search_decoder.h * Update ctc_greedy_decoder.cpp * Update ctc_greedy_decoder.h * Update decoder_utils.cpp * Update decoder_utils.h * Update path_trie.cpp * Update path_trie.h * Update scorer.h * Update scorer.cpp 3 years ago			`// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");`
Support paddle 2.x (#538) * 2.x model * model test pass * fix data * fix soundfile with flac support * one thread dataloader test pass * export feasture size add trainer and utils add setup model and dataloader update travis using Bionic dist * add venv; test under venv * fix unittest; train and valid * add train and config * add config and train script * fix ctc cuda memcopy error * fix imports * fix train valid log * fix dataset batch shuffle shift start from 1 fix rank_zero_only decreator error close tensorboard when train over add decoding config and code * test process can run * test with decoding * test and infer with decoding * fix infer * fix ctc loss lr schedule sortagrad logger * aishell egs * refactor train add aishell egs * fix dataset batch shuffle and add batch sampler log print model parameter * fix model and ctc * sequence_mask make all inputs zeros, which cause grad be zero, this is a bug of LessThanOp add grad clip by global norm add model train test notebook * ctc loss remove run prefix using ord value as text id * using unk when training compute_loss need text ids ord id using in test mode, which compute wer/cer * fix tester * add lr_deacy refactor code * fix tools * fix ci add tune fix gru model bugs add dataset and model test * fix decoding * refactor repo fix decoding * fix musan and rir dataset * refactor io, loss, conv, rnn, gradclip, model, utils * fix ci and import * refactor model add export jit model * add deploy bin and test it * rm uselss egs * add layer tools * refactor socket server new model from pretrain * remve useless * fix instability loss and grad nan or inf for librispeech training * fix sampler * fix libri train.sh * fix doc * add license on cpp * fix doc * fix libri script * fix install * clip 5 wer 7.39, clip 400 wer 7.54, 1.8 clip 400 baseline 7.49 4 years ago			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

add initial files for deployment 7 years ago			`#ifndef CTC_BEAM_SEARCH_DECODER_H_`
			`#define CTC_BEAM_SEARCH_DECODER_H_`

			`#include <string>`
			`#include <utility>`
format C++ source code 7 years ago			`#include <vector>`
format header includes & update setup info 7 years ago
add initial files for deployment 7 years ago			`#include "scorer.h"`

add the support of parallel beam search decoding in deployment 7 years ago			`/* CTC Beam Search Decoder`
code cleanup for the deployment decoder 7 years ago
			`* Parameters:`
			`* probs_seq: 2-D vector that each element is a vector of probabilities`
			`* over vocabulary of one time step.`
			`* vocabulary: A vector of vocabulary.`
format varabiables' name & add more comments 7 years ago			`* beam_size: The width of beam search.`
clean up code & update README for decoder in deployment 7 years ago			`* cutoff_prob: Cutoff probability for pruning.`
			`* cutoff_top_n: Cutoff number for pruning.`
refine by following review comments 7 years ago			`* ext_scorer: External scorer to evaluate a prefix, which consists of`
			`* n-gram language model scoring and word insertion term.`
			`* Default null, decoding the input sample without scorer.`
code cleanup for the deployment decoder 7 years ago			`* Return:`
			`* A vector that each element is a pair of score and decoding result,`
			`* in desending order.`
			`*/`
[ASR] Support CTC decoder online (#821) * fix the destructer problem for prefixes * unified offline and online in ctcdecoders, test=asr * rename swig_decoders to paddlespeech_ctcdecoders, test=asr * add reset_stage for ctcdecoder * fix some problems * fix ctconline * fix a bug * fix the format * fix 1xt2x 3 years ago			`std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(`
append some comments 7 years ago			`const std::vector<std::vector<double>> &probs_seq,`
format varabiables' name & add more comments 7 years ago			`const std::vector<std::string> &vocabulary,`
adjust scorer's init & add logging for scorer & separate long functions 7 years ago			`size_t beam_size,`
			`double cutoff_prob = 1.0,`
			`size_t cutoff_top_n = 40,`
add blank_id parameter 3 years ago			`Scorer *ext_scorer = nullptr,`
			`size_t blank_id = 0);`
refine the interface of decoders in swig 7 years ago
[ASR] Support CTC decoder online (#821) * fix the destructer problem for prefixes * unified offline and online in ctcdecoders, test=asr * rename swig_decoders to paddlespeech_ctcdecoders, test=asr * add reset_stage for ctcdecoder * fix some problems * fix ctconline * fix a bug * fix the format * fix 1xt2x 3 years ago
format header includes & update setup info 7 years ago			`/* CTC Beam Search Decoder for batch data`
add the support of parallel beam search decoding in deployment 7 years ago
refine the interface of decoders in swig 7 years ago			`* Parameters:`
add the support of parallel beam search decoding in deployment 7 years ago			`* probs_seq: 3-D vector that each element is a 2-D vector that can be used`
			`* by ctc_beam_search_decoder().`
refine the interface of decoders in swig 7 years ago			`* vocabulary: A vector of vocabulary.`
format varabiables' name & add more comments 7 years ago			`* beam_size: The width of beam search.`
add the support of parallel beam search decoding in deployment 7 years ago			`* num_processes: Number of threads for beam search.`
clean up code & update README for decoder in deployment 7 years ago			`* cutoff_prob: Cutoff probability for pruning.`
			`* cutoff_top_n: Cutoff number for pruning.`
refine by following review comments 7 years ago			`* ext_scorer: External scorer to evaluate a prefix, which consists of`
			`* n-gram language model scoring and word insertion term.`
			`* Default null, decoding the input sample without scorer.`
refine the interface of decoders in swig 7 years ago			`* Return:`
append some comments 7 years ago			`* A 2-D vector that each element is a vector of beam search decoding`
			`* result for one audio sample.`
add the support of parallel beam search decoding in deployment 7 years ago			`*/`
			`std::vector<std::vector<std::pair<double, std::string>>>`
[ASR] Support CTC decoder online (#821) * fix the destructer problem for prefixes * unified offline and online in ctcdecoders, test=asr * rename swig_decoders to paddlespeech_ctcdecoders, test=asr * add reset_stage for ctcdecoder * fix some problems * fix ctconline * fix a bug * fix the format * fix 1xt2x 3 years ago			`ctc_beam_search_decoding_batch(`
append some comments 7 years ago			`const std::vector<std::vector<std::vector<double>>> &probs_split,`
			`const std::vector<std::string> &vocabulary,`
format varabiables' name & add more comments 7 years ago			`size_t beam_size,`
adjust scorer's init & add logging for scorer & separate long functions 7 years ago			`size_t num_processes,`
format C++ source code 7 years ago			`double cutoff_prob = 1.0,`
adjust scorer's init & add logging for scorer & separate long functions 7 years ago			`size_t cutoff_top_n = 40,`
add blank_id parameter 3 years ago			`Scorer *ext_scorer = nullptr,`
			`size_t blank_id = 0);`
add initial files for deployment 7 years ago
[ASR] Support CTC decoder online (#821) * fix the destructer problem for prefixes * unified offline and online in ctcdecoders, test=asr * rename swig_decoders to paddlespeech_ctcdecoders, test=asr * add reset_stage for ctcdecoder * fix some problems * fix ctconline * fix a bug * fix the format * fix 1xt2x 3 years ago			`/**`
			`* Store the root and prefixes for decoder`
			`*/`

			`class CtcBeamSearchDecoderStorage {`
			`public:`
			`PathTrie *root = nullptr;`
			`std::vector<PathTrie *> prefixes;`

			`CtcBeamSearchDecoderStorage() {`
			`// init prefixes' root`
			`this->root = new PathTrie();`
			`this->root->log_prob_b_prev = 0.0;`
			`// The score of root is in log scale.Since the prob=1.0, the prob score`
			`// in log scale is 0.0`
			`this->root->score = root->log_prob_b_prev;`
			`// std::vector<PathTrie *> prefixes;`
			`this->prefixes.push_back(root);`
			`};`

			`~CtcBeamSearchDecoderStorage() {`
			`if (root != nullptr) {`
			`delete root;`
			`root = nullptr;`
			`}`
			`};`
			`};`

			`/**`
			`* The ctc beam search decoder, support batchsize >= 1`
			`*/`
			`class CtcBeamSearchDecoderBatch {`
			`public:`
			`CtcBeamSearchDecoderBatch(const std::vector<std::string> &vocabulary,`
			`size_t batch_size,`
			`size_t beam_size,`
			`size_t num_processes,`
			`double cutoff_prob,`
			`size_t cutoff_top_n,`
			`Scorer *ext_scorer,`
			`size_t blank_id);`

			`~CtcBeamSearchDecoderBatch();`
			`void next(const std::vector<std::vector<std::vector<double>>> &probs_split,`
			`const std::vector<std::string> &has_value);`

			`std::vector<std::vector<std::pair<double, std::string>>> decode();`

			`void reset_state(size_t batch_size,`
			`size_t beam_size,`
			`size_t num_processes,`
			`double cutoff_prob,`
			`size_t cutoff_top_n);`

			`private:`
			`std::vector<std::string> vocabulary;`
			`size_t batch_size;`
			`size_t beam_size;`
			`size_t num_processes;`
			`double cutoff_prob;`
			`size_t cutoff_top_n;`
			`Scorer *ext_scorer;`
			`size_t blank_id;`
			`std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>`
			`decoder_storage_vector;`
			`};`

			`/**`
			`* function for chunk decoding`
			`*/`
			`void ctc_beam_search_decode_chunk(`
			`PathTrie *root,`
			`std::vector<PathTrie *> &prefixes,`
			`const std::vector<std::vector<double>> &probs_seq,`
			`const std::vector<std::string> &vocabulary,`
			`size_t beam_size,`
			`double cutoff_prob,`
			`size_t cutoff_top_n,`
			`Scorer *ext_scorer,`
			`size_t blank_id);`

			`std::vector<std::pair<double, std::string>> get_decode_result(`
			`std::vector<PathTrie *> &prefixes,`
			`const std::vector<std::string> &vocabulary,`
			`size_t beam_size,`
			`Scorer *ext_scorer);`

			`/**`
			`* free the CtcBeamSearchDecoderStorage`
			`*/`
			`void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage);`

			`/**`
			`* initialize the root`
			`*/`
			`void ctc_beam_search_decode_chunk_begin(PathTrie root, Scorer ext_scorer);`

format C++ source code 7 years ago			`#endif // CTC_BEAM_SEARCH_DECODER_H_`