You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.9 KiB
99 lines
3.9 KiB
3 years ago
|
// fstext/pre-determinize.h
|
||
|
|
||
|
// Copyright 2009-2011 Microsoft Corporation
|
||
|
|
||
|
// See ../../COPYING for clarification regarding multiple authors
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||
|
// See the Apache 2 License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
|
||
|
#define KALDI_FSTEXT_PRE_DETERMINIZE_H_
|
||
|
|
||
|
#include <fst/fst-decl.h>
|
||
|
#include <fst/fstlib.h>
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <map>
|
||
|
#include <set>
|
||
|
#include <string>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "base/kaldi-common.h"
|
||
|
|
||
|
namespace fst {
|
||
|
|
||
|
/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
|
||
|
to ensure that, after epsilon removal, it will be compactly determinizable by
|
||
|
the determinize* algorithm. By compactly determinizable we mean that no
|
||
|
original FST state is represented in more than one determinized state).
|
||
|
|
||
|
Caution: this code is now only used in testing.
|
||
|
|
||
|
The new symbols start from the value "first_new_symbol", which should be
|
||
|
higher than the largest-numbered symbol currently in the FST. The new
|
||
|
symbols added are put in the array syms_out, which should be empty at start.
|
||
|
*/
|
||
|
|
||
|
template <class Arc, class Int>
|
||
|
void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
|
||
|
std::vector<Int> *syms_out);
|
||
|
|
||
|
/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
|
||
|
useful when you need to add a number of extra symbols to a different
|
||
|
vocabulary from the one modified by PreDeterminize. */
|
||
|
|
||
|
template <class Label>
|
||
|
void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
|
||
|
std::vector<Label> *syms_out);
|
||
|
|
||
|
/** AddSelfLoops is a function you will probably want to use alongside
|
||
|
PreDeterminize, to add self-loops to any FSTs that you compose on the left
|
||
|
hand side of the one modified by PreDeterminize.
|
||
|
|
||
|
This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
|
||
|
FST. This is done at each final state and each state with non-epsilon output
|
||
|
symbols on at least one arc out of it. This is to ensure that these symbols,
|
||
|
when inserted into the input side of an FST we will compose with on the
|
||
|
right, can "pass through" this FST.
|
||
|
|
||
|
At input, isyms and osyms must be vectors of the same size n, corresponding
|
||
|
to symbols that currently do not exist in 'fst'. For each state in n that
|
||
|
has non-epsilon symbols on the output side of arcs leaving it, or which is a
|
||
|
final state, this function inserts n self-loops with unit weight and one of
|
||
|
the n pairs of symbols on its input and output.
|
||
|
*/
|
||
|
template <class Arc>
|
||
|
void AddSelfLoops(MutableFst<Arc> *fst,
|
||
|
const std::vector<typename Arc::Label> &isyms,
|
||
|
const std::vector<typename Arc::Label> &osyms);
|
||
|
|
||
|
/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
|
||
|
appearing on the input side, with epsilon. */
|
||
|
/* It returns the number of instances of symbols deleted. */
|
||
|
template <class Arc>
|
||
|
int64 DeleteISymbols(MutableFst<Arc> *fst,
|
||
|
std::vector<typename Arc::Label> symsIn);
|
||
|
|
||
|
/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
|
||
|
final state with no transitions out and unit final weight, by inserting
|
||
|
epsilon transitions as necessary. */
|
||
|
template <class Arc>
|
||
|
typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);
|
||
|
|
||
|
} // end namespace fst
|
||
|
|
||
|
#include "fstext/pre-determinize-inl.h"
|
||
|
|
||
|
#endif // KALDI_FSTEXT_PRE_DETERMINIZE_H_
|