You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
117 lines
5.0 KiB
117 lines
5.0 KiB
3 years ago
|
// fstext/determinize-star.h
|
||
|
|
||
|
// Copyright 2009-2011 Microsoft Corporation
|
||
|
// 2014 Guoguo Chen
|
||
|
// 2015 Hainan Xu
|
||
|
|
||
|
// See ../../COPYING for clarification regarding multiple authors
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||
|
// See the Apache 2 License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
|
||
|
#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
|
||
|
#include <fst/fst-decl.h>
|
||
|
#include <fst/fstlib.h>
|
||
|
#include <algorithm>
|
||
|
#include <map>
|
||
|
#include <set>
|
||
|
#include <stdexcept> // this algorithm uses exceptions
|
||
|
#include <vector>
|
||
|
|
||
|
namespace fst {
|
||
|
|
||
|
/// \addtogroup fst_extensions
|
||
|
/// @{
|
||
|
|
||
|
// For example of usage, see test-determinize-star.cc
|
||
|
|
||
|
/*
|
||
|
DeterminizeStar implements determinization with epsilon removal, which we
|
||
|
distinguish with a star.
|
||
|
|
||
|
We define a determinized* FST as one in which no state has more than one
|
||
|
transition with the same input-label. Epsilon input labels are not allowed
|
||
|
except starting from states that have exactly one arc exiting them (and are
|
||
|
not final). [In the normal definition of determinized, epsilon-input labels
|
||
|
are not allowed at all, whereas in Mohri's definition, epsilons are treated
|
||
|
as ordinary symbols]. The determinized* definition is intended to simulate
|
||
|
the effect of allowing strings of output symbols at each state.
|
||
|
|
||
|
The algorithm implemented here takes an Fst<Arc>, and a pointer to a
|
||
|
MutableFst<Arc> where it puts its output. The weight type is assumed to be a
|
||
|
float-weight. It does epsilon removal and determinization.
|
||
|
This algorithm may fail if the input has epsilon cycles under
|
||
|
certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
|
||
|
semiring, or there are negative cost epsilon cycles).
|
||
|
|
||
|
This implementation is much less fancy than the one in fst/determinize.h, and
|
||
|
does not have an "on-demand" version.
|
||
|
|
||
|
The algorithm is a fairly normal determinization algorithm. We keep in
|
||
|
memory the subsets of states, together with their leftover strings and their
|
||
|
weights. The only difference is we detect input epsilon transitions and
|
||
|
treat them "specially".
|
||
|
*/
|
||
|
|
||
|
// This algorithm will be slightly faster if you sort the input fst on input
|
||
|
// label.
|
||
|
|
||
|
/**
|
||
|
This function implements the normal version of DeterminizeStar, in which the
|
||
|
output strings are represented using sequences of arcs, where all but the
|
||
|
first one has an epsilon on the input side. The debug_ptr argument is an
|
||
|
optional pointer to a bool that, if it becomes true while the algorithm is
|
||
|
executing, the algorithm will print a traceback and terminate (used in
|
||
|
fstdeterminizestar.cc debug non-terminating determinization).
|
||
|
If max_states is positive, it will stop determinization and throw an
|
||
|
exception as soon as the max-states is reached. This can be useful in test.
|
||
|
If allow_partial is true, the algorithm will output partial results when the
|
||
|
specified max_states is reached (when larger than zero), instead of throwing
|
||
|
out an error.
|
||
|
|
||
|
Caution, the return status is un-intuitive: this function will return false
|
||
|
if determinization completed normally, and true if it was stopped early by
|
||
|
reaching the 'max-states' limit, and a partial FST was generated.
|
||
|
*/
|
||
|
template <class F>
|
||
|
bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst, // NOLINT
|
||
|
float delta = kDelta, bool *debug_ptr = NULL,
|
||
|
int max_states = -1, bool allow_partial = false);
|
||
|
|
||
|
/* This is a version of DeterminizeStar with a slightly more "natural" output
|
||
|
format, where the output sequences are encoded using the GallicArc (i.e. the
|
||
|
output symbols are strings. If max_states is positive, it will stop
|
||
|
determinization and throw an exception as soon as the max-states is reached.
|
||
|
This can be useful in test. If allow_partial is true, the algorithm will
|
||
|
output partial results when the specified max_states is reached (when larger
|
||
|
than zero), instead of throwing out an error.
|
||
|
|
||
|
Caution, the return status is un-intuitive: this function will return false
|
||
|
if determinization completed normally, and true if it was stopped early by
|
||
|
reaching the 'max-states' limit, and a partial FST was generated.
|
||
|
*/
|
||
|
template <class F>
|
||
|
bool DeterminizeStar(F &ifst, // NOLINT
|
||
|
MutableFst<GallicArc<typename F::Arc> > *ofst,
|
||
|
float delta = kDelta, bool *debug_ptr = NULL,
|
||
|
int max_states = -1, bool allow_partial = false);
|
||
|
|
||
|
/// @} end "addtogroup fst_extensions"
|
||
|
|
||
|
} // end namespace fst
|
||
|
|
||
|
#include "fstext/determinize-star-inl.h"
|
||
|
|
||
|
#endif // KALDI_FSTEXT_DETERMINIZE_STAR_H_
|