// feat/wave-reader.h // Copyright 2009-2011 Karel Vesely; Microsoft Corporation // 2013 Florent Masson // 2013 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. /* // THE WAVE FORMAT IS SPECIFIED IN: // https:// ccrma.stanford.edu/courses/422/projects/WaveFormat/ // // // // RIFF // | // WAVE // | \ \ \ // fmt_ data ... data // // // Riff is a general container, which usually contains one WAVE chunk // each WAVE chunk has header sub-chunk 'fmt_' // and one or more data sub-chunks 'data' // // [Note from Dan: to say that the wave format was ever "specified" anywhere is // not quite right. The guy who invented the wave format attempted to create // a formal specification but it did not completely make sense. And there // doesn't seem to be a consensus on what makes a valid wave file, // particularly where the accuracy of header information is concerned.] */ #ifndef KALDI_FEAT_WAVE_READER_H_ #define KALDI_FEAT_WAVE_READER_H_ #include #include "base/kaldi-types.h" #include "matrix/kaldi-matrix.h" #include "matrix/kaldi-vector.h" namespace kaldi { /// For historical reasons, we scale waveforms to the range /// (2^15-1)*[-1, 1], not the usual default DSP range [-1, 1]. const BaseFloat kWaveSampleMax = 32768.0; /// This class reads and hold wave file header information. class WaveInfo { public: WaveInfo() : samp_freq_(0), samp_count_(0), num_channels_(0), reverse_bytes_(0) {} /// Is stream size unknown? Duration and SampleCount not valid if true. bool IsStreamed() const { return samp_count_ < 0; } /// Sample frequency, Hz. BaseFloat SampFreq() const { return samp_freq_; } /// Number of samples in stream. Invalid if IsStreamed() is true. uint32 SampleCount() const { return samp_count_; } /// Approximate duration, seconds. Invalid if IsStreamed() is true. BaseFloat Duration() const { return samp_count_ / samp_freq_; } /// Number of channels, 1 to 16. int32 NumChannels() const { return num_channels_; } /// Bytes per sample. size_t BlockAlign() const { return 2 * num_channels_; } /// Wave data bytes. Invalid if IsStreamed() is true. size_t DataBytes() const { return samp_count_ * BlockAlign(); } /// Is data file byte order different from machine byte order? bool ReverseBytes() const { return reverse_bytes_; } /// 'is' should be opened in binary mode. Read() will throw on error. /// On success 'is' will be positioned at the beginning of wave data. void Read(std::istream &is); private: BaseFloat samp_freq_; int32 samp_count_; // 0 if empty, -1 if undefined length. uint8 num_channels_; bool reverse_bytes_; // File endianness differs from host. }; /// This class's purpose is to read in Wave files. class WaveData { public: WaveData(BaseFloat samp_freq, const MatrixBase &data) : data_(data), samp_freq_(samp_freq) {} WaveData() : samp_freq_(0.0) {} /// Read() will throw on error. It's valid to call Read() more than once-- /// in this case it will destroy what was there before. /// "is" should be opened in binary mode. void Read(std::istream &is); /// Write() will throw on error. os should be opened in binary mode. void Write(std::ostream &os) const; // This function returns the wave data-- it's in a matrix // because there may be multiple channels. In the normal case // there's just one channel so Data() will have one row. const Matrix &Data() const { return data_; } BaseFloat SampFreq() const { return samp_freq_; } // Returns the duration in seconds BaseFloat Duration() const { return data_.NumCols() / samp_freq_; } void CopyFrom(const WaveData &other) { samp_freq_ = other.samp_freq_; data_.CopyFromMat(other.data_); } void Clear() { data_.Resize(0, 0); samp_freq_ = 0.0; } void Swap(WaveData *other) { data_.Swap(&(other->data_)); std::swap(samp_freq_, other->samp_freq_); } private: static const uint32 kBlockSize = 1024 * 1024; // Use 1M bytes. Matrix data_; BaseFloat samp_freq_; }; // Holder class for .wav files that enables us to read (but not write) .wav // files. c.f. util/kaldi-holder.h we don't use the KaldiObjectHolder template // because we don't want to check for the \0B binary header. We could have faked // it by pretending to read in the wave data in text mode after failing to find // the \0B header, but that would have been a little ugly. class WaveHolder { public: typedef WaveData T; static bool Write(std::ostream &os, bool binary, const T &t) { // We don't write the binary-mode header here [always binary]. if (!binary) KALDI_ERR << "Wave data can only be written in binary mode."; try { t.Write(os); // throws exception on failure. return true; } catch (const std::exception &e) { KALDI_WARN << "Exception caught in WaveHolder object (writing). " << e.what(); return false; // write failure. } } void Copy(const T &t) { t_.CopyFrom(t); } static bool IsReadInBinary() { return true; } void Clear() { t_.Clear(); } T &Value() { return t_; } WaveHolder &operator=(const WaveHolder &other) { t_.CopyFrom(other.t_); return *this; } WaveHolder(const WaveHolder &other) : t_(other.t_) {} WaveHolder() {} bool Read(std::istream &is) { // We don't look for the binary-mode header here [always binary] try { t_.Read(is); // Throws exception on failure. return true; } catch (const std::exception &e) { KALDI_WARN << "Exception caught in WaveHolder::Read(). " << e.what(); return false; } } void Swap(WaveHolder *other) { t_.Swap(&(other->t_)); } bool ExtractRange(const WaveHolder &other, const std::string &range) { KALDI_ERR << "ExtractRange is not defined for this type of holder."; return false; } private: T t_; }; // This is like WaveHolder but when you just want the metadata- // it leaves the actual data undefined, it doesn't read it. class WaveInfoHolder { public: typedef WaveInfo T; void Clear() { info_ = WaveInfo(); } void Swap(WaveInfoHolder *other) { std::swap(info_, other->info_); } T &Value() { return info_; } static bool IsReadInBinary() { return true; } bool Read(std::istream &is) { try { info_.Read(is); // Throws exception on failure. return true; } catch (const std::exception &e) { KALDI_WARN << "Exception caught in WaveInfoHolder::Read(). " << e.what(); return false; } } bool ExtractRange(const WaveInfoHolder &other, const std::string &range) { KALDI_ERR << "ExtractRange is not defined for this type of holder."; return false; } private: WaveInfo info_; }; } // namespace kaldi #endif // KALDI_FEAT_WAVE_READER_H_