You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/runtime/engine/kaldi/fstext/kaldi-fst-io-inl.h

209 lines
6.6 KiB

3 years ago
// fstext/kaldi-fst-io-inl.h
// Copyright 2009-2011 Microsoft Corporation
// 2012-2015 Johns Hopkins University (Author: Daniel Povey)
// 2013 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
#include <string>
#include <vector>
#include "util/text-utils.h"
namespace fst {
template <class Arc>
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
bool ok;
if (binary) {
// Binary-mode writing.
ok = t.Write(os, FstWriteOptions());
} else {
// Text-mode output. Note: we expect that t.InputSymbols() and
// t.OutputSymbols() would always return NULL. The corresponding input
// routine would not work if the FST actually had symbols attached. Write a
// newline to start the FST; in a table, the first line of the FST will
// appear on its own line.
os << '\n';
bool acceptor = false, write_one = false;
FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
acceptor, write_one, "\t");
printer.Print(&os, "<unknown>");
if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
// Write another newline as a terminating character. The read routine will
// detect this [this is a Kaldi mechanism, not something in the original
// OpenFst code].
os << '\n';
ok = os.good();
}
if (!ok) {
KALDI_ERR << "Error writing FST to stream";
}
}
// Utility function used in ReadFstKaldi
template <class W>
inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
std::istringstream strm(s);
strm >> *w;
if (strm.fail() || (!allow_zero && *w == W::Zero())) {
return false;
}
return true;
}
template <class Arc>
void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
typedef typename Arc::Weight Weight;
typedef typename Arc::StateId StateId;
if (binary) {
// We don't have access to the filename here, so write [unknown].
VectorFst<Arc> *ans =
VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
if (ans == NULL) {
KALDI_ERR << "Error reading FST from stream.";
}
*fst = *ans; // shallow copy.
delete ans;
} else {
// Consume the \r on Windows, the \n that the text-form FST format starts
// with, and any extra spaces that might have got in there somehow.
while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
if (is.peek() == '\n') {
is.get(); // consume the newline.
} else { // saw spaces but no newline.. this is not expected.
KALDI_ERR << "Reading FST: unexpected sequence of spaces "
<< " at file position " << is.tellg();
}
using kaldi::ConvertStringToInteger;
using kaldi::SplitStringToIntegers;
using std::string;
using std::vector;
fst->DeleteStates();
string line;
size_t nline = 0;
string separator = FLAGS_fst_field_separator + "\r\n";
while (std::getline(is, line)) {
nline++;
vector<string> col;
// on Windows we'll write in text and read in binary mode.
kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
if (col.size() == 0) break; // Empty line is a signal to stop, in our
// archive format.
if (col.size() > 5) {
KALDI_ERR << "Bad line in FST: " << line;
}
StateId s;
if (!ConvertStringToInteger(col[0], &s)) {
KALDI_ERR << "Bad line in FST: " << line;
}
while (s >= fst->NumStates()) fst->AddState();
if (nline == 1) fst->SetStart(s);
bool ok = true;
Arc arc;
Weight w;
StateId d = s;
switch (col.size()) {
case 1:
fst->SetFinal(s, Weight::One());
break;
case 2:
if (!StrToWeight(col[1], true, &w))
ok = false;
else
fst->SetFinal(s, w);
break;
case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
ok = false;
break;
case 4:
ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
ConvertStringToInteger(col[2], &arc.ilabel) &&
ConvertStringToInteger(col[3], &arc.olabel);
if (ok) {
d = arc.nextstate;
arc.weight = Weight::One();
fst->AddArc(s, arc);
}
break;
case 5:
ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
ConvertStringToInteger(col[2], &arc.ilabel) &&
ConvertStringToInteger(col[3], &arc.olabel) &&
StrToWeight(col[4], false, &arc.weight);
if (ok) {
d = arc.nextstate;
fst->AddArc(s, arc);
}
break;
default:
ok = false;
}
while (d >= fst->NumStates()) fst->AddState();
if (!ok) KALDI_ERR << "Bad line in FST: " << line;
}
}
}
template <class Arc> // static
bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
try {
WriteFstKaldi(os, binary, t);
return true;
} catch (...) {
return false;
}
}
template <class Arc> // static
bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
Clear();
int c = is.peek();
if (c == -1) {
KALDI_WARN << "End of stream detected reading Fst";
return false;
} else if (isspace(c)) { // The text form of the FST begins
// with space (normally, '\n'), so this means it's text (the binary form
// cannot begin with space because it starts with the FST Type() which is
// not space).
try {
t_ = new VectorFst<Arc>();
ReadFstKaldi(is, false, t_);
} catch (...) {
Clear();
return false;
}
} else { // reading a binary FST.
try {
t_ = new VectorFst<Arc>();
ReadFstKaldi(is, true, t_);
} catch (...) {
Clear();
return false;
}
}
return true;
}
} // namespace fst.
#endif // KALDI_FSTEXT_KALDI_FST_IO_INL_H_