You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/runtime/engine/common/frontend/wave-reader.h

249 lines
7.7 KiB

// feat/wave-reader.h
// Copyright 2009-2011 Karel Vesely; Microsoft Corporation
// 2013 Florent Masson
// 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
/*
// THE WAVE FORMAT IS SPECIFIED IN:
// https:// ccrma.stanford.edu/courses/422/projects/WaveFormat/
//
//
//
// RIFF
// |
// WAVE
// | \ \ \
// fmt_ data ... data
//
//
// Riff is a general container, which usually contains one WAVE chunk
// each WAVE chunk has header sub-chunk 'fmt_'
// and one or more data sub-chunks 'data'
//
// [Note from Dan: to say that the wave format was ever "specified" anywhere is
// not quite right. The guy who invented the wave format attempted to create
// a formal specification but it did not completely make sense. And there
// doesn't seem to be a consensus on what makes a valid wave file,
// particularly where the accuracy of header information is concerned.]
*/
#ifndef KALDI_FEAT_WAVE_READER_H_
#define KALDI_FEAT_WAVE_READER_H_
#include <cstring>
#include "base/kaldi-types.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/kaldi-vector.h"
namespace kaldi {
/// For historical reasons, we scale waveforms to the range
/// (2^15-1)*[-1, 1], not the usual default DSP range [-1, 1].
const BaseFloat kWaveSampleMax = 32768.0;
/// This class reads and hold wave file header information.
class WaveInfo {
public:
WaveInfo()
: samp_freq_(0), samp_count_(0), num_channels_(0), reverse_bytes_(0) {}
/// Is stream size unknown? Duration and SampleCount not valid if true.
bool IsStreamed() const { return samp_count_ < 0; }
/// Sample frequency, Hz.
BaseFloat SampFreq() const { return samp_freq_; }
/// Number of samples in stream. Invalid if IsStreamed() is true.
uint32 SampleCount() const { return samp_count_; }
/// Approximate duration, seconds. Invalid if IsStreamed() is true.
BaseFloat Duration() const { return samp_count_ / samp_freq_; }
/// Number of channels, 1 to 16.
int32 NumChannels() const { return num_channels_; }
/// Bytes per sample.
size_t BlockAlign() const { return 2 * num_channels_; }
/// Wave data bytes. Invalid if IsStreamed() is true.
size_t DataBytes() const { return samp_count_ * BlockAlign(); }
/// Is data file byte order different from machine byte order?
bool ReverseBytes() const { return reverse_bytes_; }
/// 'is' should be opened in binary mode. Read() will throw on error.
/// On success 'is' will be positioned at the beginning of wave data.
void Read(std::istream &is);
private:
BaseFloat samp_freq_;
int32 samp_count_; // 0 if empty, -1 if undefined length.
uint8 num_channels_;
bool reverse_bytes_; // File endianness differs from host.
};
/// This class's purpose is to read in Wave files.
class WaveData {
public:
WaveData(BaseFloat samp_freq, const MatrixBase<BaseFloat> &data)
: data_(data), samp_freq_(samp_freq) {}
WaveData() : samp_freq_(0.0) {}
/// Read() will throw on error. It's valid to call Read() more than once--
/// in this case it will destroy what was there before.
/// "is" should be opened in binary mode.
void Read(std::istream &is);
/// Write() will throw on error. os should be opened in binary mode.
void Write(std::ostream &os) const;
// This function returns the wave data-- it's in a matrix
// because there may be multiple channels. In the normal case
// there's just one channel so Data() will have one row.
const Matrix<BaseFloat> &Data() const { return data_; }
BaseFloat SampFreq() const { return samp_freq_; }
// Returns the duration in seconds
BaseFloat Duration() const { return data_.NumCols() / samp_freq_; }
void CopyFrom(const WaveData &other) {
samp_freq_ = other.samp_freq_;
data_.CopyFromMat(other.data_);
}
void Clear() {
data_.Resize(0, 0);
samp_freq_ = 0.0;
}
void Swap(WaveData *other) {
data_.Swap(&(other->data_));
std::swap(samp_freq_, other->samp_freq_);
}
private:
static const uint32 kBlockSize = 1024 * 1024; // Use 1M bytes.
Matrix<BaseFloat> data_;
BaseFloat samp_freq_;
};
// Holder class for .wav files that enables us to read (but not write) .wav
// files. c.f. util/kaldi-holder.h we don't use the KaldiObjectHolder template
// because we don't want to check for the \0B binary header. We could have faked
// it by pretending to read in the wave data in text mode after failing to find
// the \0B header, but that would have been a little ugly.
class WaveHolder {
public:
typedef WaveData T;
static bool Write(std::ostream &os, bool binary, const T &t) {
// We don't write the binary-mode header here [always binary].
if (!binary)
KALDI_ERR << "Wave data can only be written in binary mode.";
try {
t.Write(os); // throws exception on failure.
return true;
} catch (const std::exception &e) {
KALDI_WARN << "Exception caught in WaveHolder object (writing). "
<< e.what();
return false; // write failure.
}
}
void Copy(const T &t) { t_.CopyFrom(t); }
static bool IsReadInBinary() { return true; }
void Clear() { t_.Clear(); }
T &Value() { return t_; }
WaveHolder &operator=(const WaveHolder &other) {
t_.CopyFrom(other.t_);
return *this;
}
WaveHolder(const WaveHolder &other) : t_(other.t_) {}
WaveHolder() {}
bool Read(std::istream &is) {
// We don't look for the binary-mode header here [always binary]
try {
t_.Read(is); // Throws exception on failure.
return true;
} catch (const std::exception &e) {
KALDI_WARN << "Exception caught in WaveHolder::Read(). "
<< e.what();
return false;
}
}
void Swap(WaveHolder *other) { t_.Swap(&(other->t_)); }
bool ExtractRange(const WaveHolder &other, const std::string &range) {
KALDI_ERR << "ExtractRange is not defined for this type of holder.";
return false;
}
private:
T t_;
};
// This is like WaveHolder but when you just want the metadata-
// it leaves the actual data undefined, it doesn't read it.
class WaveInfoHolder {
public:
typedef WaveInfo T;
void Clear() { info_ = WaveInfo(); }
void Swap(WaveInfoHolder *other) { std::swap(info_, other->info_); }
T &Value() { return info_; }
static bool IsReadInBinary() { return true; }
bool Read(std::istream &is) {
try {
info_.Read(is); // Throws exception on failure.
return true;
} catch (const std::exception &e) {
KALDI_WARN << "Exception caught in WaveInfoHolder::Read(). "
<< e.what();
return false;
}
}
bool ExtractRange(const WaveInfoHolder &other, const std::string &range) {
KALDI_ERR << "ExtractRange is not defined for this type of holder.";
return false;
}
private:
WaveInfo info_;
};
} // namespace kaldi
#endif // KALDI_FEAT_WAVE_READER_H_