You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
11 KiB
328 lines
11 KiB
3 years ago
|
// base/io-funcs-inl.h
|
||
|
|
||
|
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
|
||
|
// Jan Silovsky; Yanmin Qian;
|
||
|
// Johns Hopkins University (Author: Daniel Povey)
|
||
|
// 2016 Xiaohui Zhang
|
||
|
|
||
|
// See ../../COPYING for clarification regarding multiple authors
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||
|
// See the Apache 2 License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
#ifndef KALDI_BASE_IO_FUNCS_INL_H_
|
||
|
#define KALDI_BASE_IO_FUNCS_INL_H_ 1
|
||
|
|
||
|
// Do not include this file directly. It is included by base/io-funcs.h
|
||
|
|
||
|
#include <limits>
|
||
|
#include <vector>
|
||
|
|
||
|
namespace kaldi {
|
||
|
|
||
|
// Template that covers integers.
|
||
|
template<class T> void WriteBasicType(std::ostream &os,
|
||
|
bool binary, T t) {
|
||
|
// Compile time assertion that this is not called with a wrong type.
|
||
|
KALDI_ASSERT_IS_INTEGER_TYPE(T);
|
||
|
if (binary) {
|
||
|
char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1)
|
||
|
* static_cast<char>(sizeof(t));
|
||
|
os.put(len_c);
|
||
|
os.write(reinterpret_cast<const char *>(&t), sizeof(t));
|
||
|
} else {
|
||
|
if (sizeof(t) == 1)
|
||
|
os << static_cast<int16>(t) << " ";
|
||
|
else
|
||
|
os << t << " ";
|
||
|
}
|
||
|
if (os.fail()) {
|
||
|
KALDI_ERR << "Write failure in WriteBasicType.";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Template that covers integers.
|
||
|
template<class T> inline void ReadBasicType(std::istream &is,
|
||
|
bool binary, T *t) {
|
||
|
KALDI_PARANOID_ASSERT(t != NULL);
|
||
|
// Compile time assertion that this is not called with a wrong type.
|
||
|
KALDI_ASSERT_IS_INTEGER_TYPE(T);
|
||
|
if (binary) {
|
||
|
int len_c_in = is.get();
|
||
|
if (len_c_in == -1)
|
||
|
KALDI_ERR << "ReadBasicType: encountered end of stream.";
|
||
|
char len_c = static_cast<char>(len_c_in), len_c_expected
|
||
|
= (std::numeric_limits<T>::is_signed ? 1 : -1)
|
||
|
* static_cast<char>(sizeof(*t));
|
||
|
if (len_c != len_c_expected) {
|
||
|
KALDI_ERR << "ReadBasicType: did not get expected integer type, "
|
||
|
<< static_cast<int>(len_c)
|
||
|
<< " vs. " << static_cast<int>(len_c_expected)
|
||
|
<< ". You can change this code to successfully"
|
||
|
<< " read it later, if needed.";
|
||
|
// insert code here to read "wrong" type. Might have a switch statement.
|
||
|
}
|
||
|
is.read(reinterpret_cast<char *>(t), sizeof(*t));
|
||
|
} else {
|
||
|
if (sizeof(*t) == 1) {
|
||
|
int16 i;
|
||
|
is >> i;
|
||
|
*t = i;
|
||
|
} else {
|
||
|
is >> *t;
|
||
|
}
|
||
|
}
|
||
|
if (is.fail()) {
|
||
|
KALDI_ERR << "Read failure in ReadBasicType, file position is "
|
||
|
<< is.tellg() << ", next char is " << is.peek();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Template that covers integers.
|
||
|
template<class T>
|
||
|
inline void WriteIntegerPairVector(std::ostream &os, bool binary,
|
||
|
const std::vector<std::pair<T, T> > &v) {
|
||
|
// Compile time assertion that this is not called with a wrong type.
|
||
|
KALDI_ASSERT_IS_INTEGER_TYPE(T);
|
||
|
if (binary) {
|
||
|
char sz = sizeof(T); // this is currently just a check.
|
||
|
os.write(&sz, 1);
|
||
|
int32 vecsz = static_cast<int32>(v.size());
|
||
|
KALDI_ASSERT((size_t)vecsz == v.size());
|
||
|
os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
|
||
|
if (vecsz != 0) {
|
||
|
os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
|
||
|
}
|
||
|
} else {
|
||
|
// focus here is on prettiness of text form rather than
|
||
|
// efficiency of reading-in.
|
||
|
// reading-in is dominated by low-level operations anyway:
|
||
|
// for efficiency use binary.
|
||
|
os << "[ ";
|
||
|
typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
|
||
|
end = v.end();
|
||
|
for (; iter != end; ++iter) {
|
||
|
if (sizeof(T) == 1)
|
||
|
os << static_cast<int16>(iter->first) << ','
|
||
|
<< static_cast<int16>(iter->second) << ' ';
|
||
|
else
|
||
|
os << iter->first << ','
|
||
|
<< iter->second << ' ';
|
||
|
}
|
||
|
os << "]\n";
|
||
|
}
|
||
|
if (os.fail()) {
|
||
|
KALDI_ERR << "Write failure in WriteIntegerPairVector.";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Template that covers integers.
|
||
|
template<class T>
|
||
|
inline void ReadIntegerPairVector(std::istream &is, bool binary,
|
||
|
std::vector<std::pair<T, T> > *v) {
|
||
|
KALDI_ASSERT_IS_INTEGER_TYPE(T);
|
||
|
KALDI_ASSERT(v != NULL);
|
||
|
if (binary) {
|
||
|
int sz = is.peek();
|
||
|
if (sz == sizeof(T)) {
|
||
|
is.get();
|
||
|
} else { // this is currently just a check.
|
||
|
KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
|
||
|
<< sizeof(T) << ", saw instead " << sz << ", at file position "
|
||
|
<< is.tellg();
|
||
|
}
|
||
|
int32 vecsz;
|
||
|
is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
|
||
|
if (is.fail() || vecsz < 0) goto bad;
|
||
|
v->resize(vecsz);
|
||
|
if (vecsz > 0) {
|
||
|
is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz*2);
|
||
|
}
|
||
|
} else {
|
||
|
std::vector<std::pair<T, T> > tmp_v; // use temporary so v doesn't use extra memory
|
||
|
// due to resizing.
|
||
|
is >> std::ws;
|
||
|
if (is.peek() != static_cast<int>('[')) {
|
||
|
KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw "
|
||
|
<< is.peek() << ", at file position " << is.tellg();
|
||
|
}
|
||
|
is.get(); // consume the '['.
|
||
|
is >> std::ws; // consume whitespace.
|
||
|
while (is.peek() != static_cast<int>(']')) {
|
||
|
if (sizeof(T) == 1) { // read/write chars as numbers.
|
||
|
int16 next_t1, next_t2;
|
||
|
is >> next_t1;
|
||
|
if (is.fail()) goto bad;
|
||
|
if (is.peek() != static_cast<int>(','))
|
||
|
KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
|
||
|
<< is.peek() << ", at file position " << is.tellg();
|
||
|
is.get(); // consume the ','.
|
||
|
is >> next_t2 >> std::ws;
|
||
|
if (is.fail()) goto bad;
|
||
|
else
|
||
|
tmp_v.push_back(std::make_pair<T, T>((T)next_t1, (T)next_t2));
|
||
|
} else {
|
||
|
T next_t1, next_t2;
|
||
|
is >> next_t1;
|
||
|
if (is.fail()) goto bad;
|
||
|
if (is.peek() != static_cast<int>(','))
|
||
|
KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
|
||
|
<< is.peek() << ", at file position " << is.tellg();
|
||
|
is.get(); // consume the ','.
|
||
|
is >> next_t2 >> std::ws;
|
||
|
if (is.fail()) goto bad;
|
||
|
else
|
||
|
tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
|
||
|
}
|
||
|
}
|
||
|
is.get(); // get the final ']'.
|
||
|
*v = tmp_v; // could use std::swap to use less temporary memory, but this
|
||
|
// uses less permanent memory.
|
||
|
}
|
||
|
if (!is.fail()) return;
|
||
|
bad:
|
||
|
KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
|
||
|
<< is.tellg();
|
||
|
}
|
||
|
|
||
|
template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
|
||
|
const std::vector<T> &v) {
|
||
|
// Compile time assertion that this is not called with a wrong type.
|
||
|
KALDI_ASSERT_IS_INTEGER_TYPE(T);
|
||
|
if (binary) {
|
||
|
char sz = sizeof(T); // this is currently just a check.
|
||
|
os.write(&sz, 1);
|
||
|
int32 vecsz = static_cast<int32>(v.size());
|
||
|
KALDI_ASSERT((size_t)vecsz == v.size());
|
||
|
os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
|
||
|
if (vecsz != 0) {
|
||
|
os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
|
||
|
}
|
||
|
} else {
|
||
|
// focus here is on prettiness of text form rather than
|
||
|
// efficiency of reading-in.
|
||
|
// reading-in is dominated by low-level operations anyway:
|
||
|
// for efficiency use binary.
|
||
|
os << "[ ";
|
||
|
typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
|
||
|
for (; iter != end; ++iter) {
|
||
|
if (sizeof(T) == 1)
|
||
|
os << static_cast<int16>(*iter) << " ";
|
||
|
else
|
||
|
os << *iter << " ";
|
||
|
}
|
||
|
os << "]\n";
|
||
|
}
|
||
|
if (os.fail()) {
|
||
|
KALDI_ERR << "Write failure in WriteIntegerVector.";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
template<class T> inline void ReadIntegerVector(std::istream &is,
|
||
|
bool binary,
|
||
|
std::vector<T> *v) {
|
||
|
KALDI_ASSERT_IS_INTEGER_TYPE(T);
|
||
|
KALDI_ASSERT(v != NULL);
|
||
|
if (binary) {
|
||
|
int sz = is.peek();
|
||
|
if (sz == sizeof(T)) {
|
||
|
is.get();
|
||
|
} else { // this is currently just a check.
|
||
|
KALDI_ERR << "ReadIntegerVector: expected to see type of size "
|
||
|
<< sizeof(T) << ", saw instead " << sz << ", at file position "
|
||
|
<< is.tellg();
|
||
|
}
|
||
|
int32 vecsz;
|
||
|
is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
|
||
|
if (is.fail() || vecsz < 0) goto bad;
|
||
|
v->resize(vecsz);
|
||
|
if (vecsz > 0) {
|
||
|
is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
|
||
|
}
|
||
|
} else {
|
||
|
std::vector<T> tmp_v; // use temporary so v doesn't use extra memory
|
||
|
// due to resizing.
|
||
|
is >> std::ws;
|
||
|
if (is.peek() != static_cast<int>('[')) {
|
||
|
KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
|
||
|
<< is.peek() << ", at file position " << is.tellg();
|
||
|
}
|
||
|
is.get(); // consume the '['.
|
||
|
is >> std::ws; // consume whitespace.
|
||
|
while (is.peek() != static_cast<int>(']')) {
|
||
|
if (sizeof(T) == 1) { // read/write chars as numbers.
|
||
|
int16 next_t;
|
||
|
is >> next_t >> std::ws;
|
||
|
if (is.fail()) goto bad;
|
||
|
else
|
||
|
tmp_v.push_back((T)next_t);
|
||
|
} else {
|
||
|
T next_t;
|
||
|
is >> next_t >> std::ws;
|
||
|
if (is.fail()) goto bad;
|
||
|
else
|
||
|
tmp_v.push_back(next_t);
|
||
|
}
|
||
|
}
|
||
|
is.get(); // get the final ']'.
|
||
|
*v = tmp_v; // could use std::swap to use less temporary memory, but this
|
||
|
// uses less permanent memory.
|
||
|
}
|
||
|
if (!is.fail()) return;
|
||
|
bad:
|
||
|
KALDI_ERR << "ReadIntegerVector: read failure at file position "
|
||
|
<< is.tellg();
|
||
|
}
|
||
|
|
||
|
|
||
|
// Initialize an opened stream for writing by writing an optional binary
|
||
|
// header and modifying the floating-point precision.
|
||
|
inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
|
||
|
// This does not throw exceptions (does not check for errors).
|
||
|
if (binary) {
|
||
|
os.put('\0');
|
||
|
os.put('B');
|
||
|
}
|
||
|
// Note, in non-binary mode we may at some point want to mess with
|
||
|
// the precision a bit.
|
||
|
// 7 is a bit more than the precision of float..
|
||
|
if (os.precision() < 7)
|
||
|
os.precision(7);
|
||
|
}
|
||
|
|
||
|
/// Initialize an opened stream for reading by detecting the binary header and
|
||
|
// setting the "binary" value appropriately.
|
||
|
inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
|
||
|
// Sets the 'binary' variable.
|
||
|
// Throws exception in the very unusual situation that stream
|
||
|
// starts with '\0' but not then 'B'.
|
||
|
|
||
|
if (is.peek() == '\0') { // seems to be binary
|
||
|
is.get();
|
||
|
if (is.peek() != 'B') {
|
||
|
return false;
|
||
|
}
|
||
|
is.get();
|
||
|
*binary = true;
|
||
|
return true;
|
||
|
} else {
|
||
|
*binary = false;
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} // end namespace kaldi.
|
||
|
|
||
|
#endif // KALDI_BASE_IO_FUNCS_INL_H_
|