You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
249 lines
7.7 KiB
249 lines
7.7 KiB
2 years ago
|
// feat/wave-reader.h
|
||
|
|
||
|
// Copyright 2009-2011 Karel Vesely; Microsoft Corporation
|
||
|
// 2013 Florent Masson
|
||
|
// 2013 Johns Hopkins University (author: Daniel Povey)
|
||
|
|
||
|
// See ../../COPYING for clarification regarding multiple authors
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||
|
// See the Apache 2 License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
|
||
|
/*
|
||
|
// THE WAVE FORMAT IS SPECIFIED IN:
|
||
|
// https:// ccrma.stanford.edu/courses/422/projects/WaveFormat/
|
||
|
//
|
||
|
//
|
||
|
//
|
||
|
// RIFF
|
||
|
// |
|
||
|
// WAVE
|
||
|
// | \ \ \
|
||
|
// fmt_ data ... data
|
||
|
//
|
||
|
//
|
||
|
// Riff is a general container, which usually contains one WAVE chunk
|
||
|
// each WAVE chunk has header sub-chunk 'fmt_'
|
||
|
// and one or more data sub-chunks 'data'
|
||
|
//
|
||
|
// [Note from Dan: to say that the wave format was ever "specified" anywhere is
|
||
|
// not quite right. The guy who invented the wave format attempted to create
|
||
|
// a formal specification but it did not completely make sense. And there
|
||
|
// doesn't seem to be a consensus on what makes a valid wave file,
|
||
|
// particularly where the accuracy of header information is concerned.]
|
||
|
*/
|
||
|
|
||
|
|
||
|
#ifndef KALDI_FEAT_WAVE_READER_H_
|
||
|
#define KALDI_FEAT_WAVE_READER_H_
|
||
|
|
||
|
#include <cstring>
|
||
|
|
||
|
#include "base/kaldi-types.h"
|
||
|
#include "matrix/kaldi-matrix.h"
|
||
|
#include "matrix/kaldi-vector.h"
|
||
|
|
||
|
|
||
|
namespace kaldi {
|
||
|
|
||
|
/// For historical reasons, we scale waveforms to the range
|
||
|
/// (2^15-1)*[-1, 1], not the usual default DSP range [-1, 1].
|
||
|
const BaseFloat kWaveSampleMax = 32768.0;
|
||
|
|
||
|
/// This class reads and hold wave file header information.
|
||
|
class WaveInfo {
|
||
|
public:
|
||
|
WaveInfo()
|
||
|
: samp_freq_(0), samp_count_(0), num_channels_(0), reverse_bytes_(0) {}
|
||
|
|
||
|
/// Is stream size unknown? Duration and SampleCount not valid if true.
|
||
|
bool IsStreamed() const { return samp_count_ < 0; }
|
||
|
|
||
|
/// Sample frequency, Hz.
|
||
|
BaseFloat SampFreq() const { return samp_freq_; }
|
||
|
|
||
|
/// Number of samples in stream. Invalid if IsStreamed() is true.
|
||
|
uint32 SampleCount() const { return samp_count_; }
|
||
|
|
||
|
/// Approximate duration, seconds. Invalid if IsStreamed() is true.
|
||
|
BaseFloat Duration() const { return samp_count_ / samp_freq_; }
|
||
|
|
||
|
/// Number of channels, 1 to 16.
|
||
|
int32 NumChannels() const { return num_channels_; }
|
||
|
|
||
|
/// Bytes per sample.
|
||
|
size_t BlockAlign() const { return 2 * num_channels_; }
|
||
|
|
||
|
/// Wave data bytes. Invalid if IsStreamed() is true.
|
||
|
size_t DataBytes() const { return samp_count_ * BlockAlign(); }
|
||
|
|
||
|
/// Is data file byte order different from machine byte order?
|
||
|
bool ReverseBytes() const { return reverse_bytes_; }
|
||
|
|
||
|
/// 'is' should be opened in binary mode. Read() will throw on error.
|
||
|
/// On success 'is' will be positioned at the beginning of wave data.
|
||
|
void Read(std::istream &is);
|
||
|
|
||
|
private:
|
||
|
BaseFloat samp_freq_;
|
||
|
int32 samp_count_; // 0 if empty, -1 if undefined length.
|
||
|
uint8 num_channels_;
|
||
|
bool reverse_bytes_; // File endianness differs from host.
|
||
|
};
|
||
|
|
||
|
/// This class's purpose is to read in Wave files.
|
||
|
class WaveData {
|
||
|
public:
|
||
|
WaveData(BaseFloat samp_freq, const MatrixBase<BaseFloat> &data)
|
||
|
: data_(data), samp_freq_(samp_freq) {}
|
||
|
|
||
|
WaveData() : samp_freq_(0.0) {}
|
||
|
|
||
|
/// Read() will throw on error. It's valid to call Read() more than once--
|
||
|
/// in this case it will destroy what was there before.
|
||
|
/// "is" should be opened in binary mode.
|
||
|
void Read(std::istream &is);
|
||
|
|
||
|
/// Write() will throw on error. os should be opened in binary mode.
|
||
|
void Write(std::ostream &os) const;
|
||
|
|
||
|
// This function returns the wave data-- it's in a matrix
|
||
|
// because there may be multiple channels. In the normal case
|
||
|
// there's just one channel so Data() will have one row.
|
||
|
const Matrix<BaseFloat> &Data() const { return data_; }
|
||
|
|
||
|
BaseFloat SampFreq() const { return samp_freq_; }
|
||
|
|
||
|
// Returns the duration in seconds
|
||
|
BaseFloat Duration() const { return data_.NumCols() / samp_freq_; }
|
||
|
|
||
|
void CopyFrom(const WaveData &other) {
|
||
|
samp_freq_ = other.samp_freq_;
|
||
|
data_.CopyFromMat(other.data_);
|
||
|
}
|
||
|
|
||
|
void Clear() {
|
||
|
data_.Resize(0, 0);
|
||
|
samp_freq_ = 0.0;
|
||
|
}
|
||
|
|
||
|
void Swap(WaveData *other) {
|
||
|
data_.Swap(&(other->data_));
|
||
|
std::swap(samp_freq_, other->samp_freq_);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
static const uint32 kBlockSize = 1024 * 1024; // Use 1M bytes.
|
||
|
Matrix<BaseFloat> data_;
|
||
|
BaseFloat samp_freq_;
|
||
|
};
|
||
|
|
||
|
|
||
|
// Holder class for .wav files that enables us to read (but not write) .wav
|
||
|
// files. c.f. util/kaldi-holder.h we don't use the KaldiObjectHolder template
|
||
|
// because we don't want to check for the \0B binary header. We could have faked
|
||
|
// it by pretending to read in the wave data in text mode after failing to find
|
||
|
// the \0B header, but that would have been a little ugly.
|
||
|
class WaveHolder {
|
||
|
public:
|
||
|
typedef WaveData T;
|
||
|
|
||
|
static bool Write(std::ostream &os, bool binary, const T &t) {
|
||
|
// We don't write the binary-mode header here [always binary].
|
||
|
if (!binary)
|
||
|
KALDI_ERR << "Wave data can only be written in binary mode.";
|
||
|
try {
|
||
|
t.Write(os); // throws exception on failure.
|
||
|
return true;
|
||
|
} catch (const std::exception &e) {
|
||
|
KALDI_WARN << "Exception caught in WaveHolder object (writing). "
|
||
|
<< e.what();
|
||
|
return false; // write failure.
|
||
|
}
|
||
|
}
|
||
|
void Copy(const T &t) { t_.CopyFrom(t); }
|
||
|
|
||
|
static bool IsReadInBinary() { return true; }
|
||
|
|
||
|
void Clear() { t_.Clear(); }
|
||
|
|
||
|
T &Value() { return t_; }
|
||
|
|
||
|
WaveHolder &operator=(const WaveHolder &other) {
|
||
|
t_.CopyFrom(other.t_);
|
||
|
return *this;
|
||
|
}
|
||
|
WaveHolder(const WaveHolder &other) : t_(other.t_) {}
|
||
|
|
||
|
WaveHolder() {}
|
||
|
|
||
|
bool Read(std::istream &is) {
|
||
|
// We don't look for the binary-mode header here [always binary]
|
||
|
try {
|
||
|
t_.Read(is); // Throws exception on failure.
|
||
|
return true;
|
||
|
} catch (const std::exception &e) {
|
||
|
KALDI_WARN << "Exception caught in WaveHolder::Read(). "
|
||
|
<< e.what();
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Swap(WaveHolder *other) { t_.Swap(&(other->t_)); }
|
||
|
|
||
|
bool ExtractRange(const WaveHolder &other, const std::string &range) {
|
||
|
KALDI_ERR << "ExtractRange is not defined for this type of holder.";
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
T t_;
|
||
|
};
|
||
|
|
||
|
// This is like WaveHolder but when you just want the metadata-
|
||
|
// it leaves the actual data undefined, it doesn't read it.
|
||
|
class WaveInfoHolder {
|
||
|
public:
|
||
|
typedef WaveInfo T;
|
||
|
|
||
|
void Clear() { info_ = WaveInfo(); }
|
||
|
void Swap(WaveInfoHolder *other) { std::swap(info_, other->info_); }
|
||
|
T &Value() { return info_; }
|
||
|
static bool IsReadInBinary() { return true; }
|
||
|
|
||
|
bool Read(std::istream &is) {
|
||
|
try {
|
||
|
info_.Read(is); // Throws exception on failure.
|
||
|
return true;
|
||
|
} catch (const std::exception &e) {
|
||
|
KALDI_WARN << "Exception caught in WaveInfoHolder::Read(). "
|
||
|
<< e.what();
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool ExtractRange(const WaveInfoHolder &other, const std::string &range) {
|
||
|
KALDI_ERR << "ExtractRange is not defined for this type of holder.";
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
WaveInfo info_;
|
||
|
};
|
||
|
|
||
|
|
||
|
} // namespace kaldi
|
||
|
|
||
|
#endif // KALDI_FEAT_WAVE_READER_H_
|