[audio]replace kaldi fbank with kaldi-native-fbank in paddleaudio (#2799)
* replace kaldi_fbank with kaldi-native-fbank in paddleaudio * fix macpull/2801/head
parent
964211a81b
commit
d7a6268bcc
@ -0,0 +1,22 @@
|
|||||||
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../)
|
||||||
|
add_library(kaldi-native-fbank-core
|
||||||
|
feature-fbank.cc
|
||||||
|
feature-functions.cc
|
||||||
|
feature-window.cc
|
||||||
|
fftsg.c
|
||||||
|
log.cc
|
||||||
|
mel-computations.cc
|
||||||
|
rfft.cc
|
||||||
|
)
|
||||||
|
# We are using std::call_once() in log.h,which requires us to link with -pthread
|
||||||
|
if(NOT WIN32)
|
||||||
|
target_link_libraries(kaldi-native-fbank-core -pthread)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(KNF_HAVE_EXECINFO_H)
|
||||||
|
target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_EXECINFO_H=1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(KNF_HAVE_CXXABI_H)
|
||||||
|
target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_CXXABI_H=1)
|
||||||
|
endif()
|
@ -0,0 +1,117 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/feature-fbank.cc
|
||||||
|
//
|
||||||
|
#include "kaldi-native-fbank/csrc/feature-fbank.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/feature-functions.h"
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
static void Sqrt(float *in_out, int32_t n) {
|
||||||
|
for (int32_t i = 0; i != n; ++i) {
|
||||||
|
in_out[i] = std::sqrt(in_out[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
|
||||||
|
os << opts.ToString();
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
FbankComputer::FbankComputer(const FbankOptions &opts)
|
||||||
|
: opts_(opts), rfft_(opts.frame_opts.PaddedWindowSize()) {
|
||||||
|
if (opts.energy_floor > 0.0f) {
|
||||||
|
log_energy_floor_ = logf(opts.energy_floor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We'll definitely need the filterbanks info for VTLN warping factor 1.0.
|
||||||
|
// [note: this call caches it.]
|
||||||
|
GetMelBanks(1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
FbankComputer::~FbankComputer() {
|
||||||
|
for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
|
||||||
|
delete iter->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) {
|
||||||
|
MelBanks *this_mel_banks = nullptr;
|
||||||
|
|
||||||
|
// std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
|
||||||
|
auto iter = mel_banks_.find(vtln_warp);
|
||||||
|
if (iter == mel_banks_.end()) {
|
||||||
|
this_mel_banks = new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp);
|
||||||
|
mel_banks_[vtln_warp] = this_mel_banks;
|
||||||
|
} else {
|
||||||
|
this_mel_banks = iter->second;
|
||||||
|
}
|
||||||
|
return this_mel_banks;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FbankComputer::Compute(float signal_raw_log_energy, float vtln_warp,
|
||||||
|
std::vector<float> *signal_frame, float *feature) {
|
||||||
|
const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
|
||||||
|
|
||||||
|
KNF_CHECK_EQ(signal_frame->size(), opts_.frame_opts.PaddedWindowSize());
|
||||||
|
|
||||||
|
// Compute energy after window function (not the raw one).
|
||||||
|
if (opts_.use_energy && !opts_.raw_energy) {
|
||||||
|
signal_raw_log_energy = std::log(
|
||||||
|
std::max<float>(InnerProduct(signal_frame->data(), signal_frame->data(),
|
||||||
|
signal_frame->size()),
|
||||||
|
std::numeric_limits<float>::epsilon()));
|
||||||
|
}
|
||||||
|
rfft_.Compute(signal_frame->data()); // signal_frame is modified in-place
|
||||||
|
ComputePowerSpectrum(signal_frame);
|
||||||
|
|
||||||
|
// Use magnitude instead of power if requested.
|
||||||
|
if (!opts_.use_power) {
|
||||||
|
Sqrt(signal_frame->data(), signal_frame->size() / 2 + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
|
||||||
|
|
||||||
|
// Its length is opts_.mel_opts.num_bins
|
||||||
|
float *mel_energies = feature + mel_offset;
|
||||||
|
|
||||||
|
// Sum with mel filter banks over the power spectrum
|
||||||
|
mel_banks.Compute(signal_frame->data(), mel_energies);
|
||||||
|
|
||||||
|
if (opts_.use_log_fbank) {
|
||||||
|
// Avoid log of zero (which should be prevented anyway by dithering).
|
||||||
|
for (int32_t i = 0; i != opts_.mel_opts.num_bins; ++i) {
|
||||||
|
auto t = std::max(mel_energies[i], std::numeric_limits<float>::epsilon());
|
||||||
|
mel_energies[i] = std::log(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy energy as first value (or the last, if htk_compat == true).
|
||||||
|
if (opts_.use_energy) {
|
||||||
|
if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
|
||||||
|
signal_raw_log_energy = log_energy_floor_;
|
||||||
|
}
|
||||||
|
int32_t energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
|
||||||
|
feature[energy_index] = signal_raw_log_energy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace knf
|
@ -0,0 +1,132 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/feature-fbank.h
|
||||||
|
|
||||||
|
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
|
||||||
|
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||||
|
#include "kaldi-native-fbank/csrc/mel-computations.h"
|
||||||
|
#include "kaldi-native-fbank/csrc/rfft.h"
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
struct FbankOptions {
|
||||||
|
FrameExtractionOptions frame_opts;
|
||||||
|
MelBanksOptions mel_opts;
|
||||||
|
// append an extra dimension with energy to the filter banks
|
||||||
|
bool use_energy = false;
|
||||||
|
float energy_floor = 0.0f; // active iff use_energy==true
|
||||||
|
|
||||||
|
// If true, compute log_energy before preemphasis and windowing
|
||||||
|
// If false, compute log_energy after preemphasis ans windowing
|
||||||
|
bool raw_energy = true; // active iff use_energy==true
|
||||||
|
|
||||||
|
// If true, put energy last (if using energy)
|
||||||
|
// If false, put energy first
|
||||||
|
bool htk_compat = false; // active iff use_energy==true
|
||||||
|
|
||||||
|
// if true (default), produce log-filterbank, else linear
|
||||||
|
bool use_log_fbank = true;
|
||||||
|
|
||||||
|
// if true (default), use power in filterbank
|
||||||
|
// analysis, else magnitude.
|
||||||
|
bool use_power = true;
|
||||||
|
|
||||||
|
FbankOptions() { mel_opts.num_bins = 23; }
|
||||||
|
|
||||||
|
std::string ToString() const {
|
||||||
|
std::ostringstream os;
|
||||||
|
os << "frame_opts: \n";
|
||||||
|
os << frame_opts << "\n";
|
||||||
|
os << "\n";
|
||||||
|
|
||||||
|
os << "mel_opts: \n";
|
||||||
|
os << mel_opts << "\n";
|
||||||
|
|
||||||
|
os << "use_energy: " << use_energy << "\n";
|
||||||
|
os << "energy_floor: " << energy_floor << "\n";
|
||||||
|
os << "raw_energy: " << raw_energy << "\n";
|
||||||
|
os << "htk_compat: " << htk_compat << "\n";
|
||||||
|
os << "use_log_fbank: " << use_log_fbank << "\n";
|
||||||
|
os << "use_power: " << use_power << "\n";
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);
|
||||||
|
|
||||||
|
class FbankComputer {
|
||||||
|
public:
|
||||||
|
using Options = FbankOptions;
|
||||||
|
|
||||||
|
explicit FbankComputer(const FbankOptions &opts);
|
||||||
|
~FbankComputer();
|
||||||
|
|
||||||
|
int32_t Dim() const {
|
||||||
|
return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if true, compute log_energy_pre_window but after dithering and dc removal
|
||||||
|
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
|
||||||
|
|
||||||
|
const FrameExtractionOptions &GetFrameOptions() const {
|
||||||
|
return opts_.frame_opts;
|
||||||
|
}
|
||||||
|
|
||||||
|
const FbankOptions &GetOptions() const { return opts_; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
Function that computes one frame of features from
|
||||||
|
one frame of signal.
|
||||||
|
|
||||||
|
@param [in] signal_raw_log_energy The log-energy of the frame of the signal
|
||||||
|
prior to windowing and pre-emphasis, or
|
||||||
|
log(numeric_limits<float>::min()), whichever is greater. Must be
|
||||||
|
ignored by this function if this class returns false from
|
||||||
|
this->NeedsRawLogEnergy().
|
||||||
|
@param [in] vtln_warp The VTLN warping factor that the user wants
|
||||||
|
to be applied when computing features for this utterance. Will
|
||||||
|
normally be 1.0, meaning no warping is to be done. The value will
|
||||||
|
be ignored for feature types that don't support VLTN, such as
|
||||||
|
spectrogram features.
|
||||||
|
@param [in] signal_frame One frame of the signal,
|
||||||
|
as extracted using the function ExtractWindow() using the options
|
||||||
|
returned by this->GetFrameOptions(). The function will use the
|
||||||
|
vector as a workspace, which is why it's a non-const pointer.
|
||||||
|
@param [out] feature Pointer to a vector of size this->Dim(), to which
|
||||||
|
the computed feature will be written. It should be pre-allocated.
|
||||||
|
*/
|
||||||
|
void Compute(float signal_raw_log_energy, float vtln_warp,
|
||||||
|
std::vector<float> *signal_frame, float *feature);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const MelBanks *GetMelBanks(float vtln_warp);
|
||||||
|
|
||||||
|
FbankOptions opts_;
|
||||||
|
float log_energy_floor_;
|
||||||
|
std::map<float, MelBanks *> mel_banks_; // float is VTLN coefficient.
|
||||||
|
Rfft rfft_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace knf
|
||||||
|
|
||||||
|
#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
|
@ -0,0 +1,49 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/feature-functions.cc
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/feature-functions.h"
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
void ComputePowerSpectrum(std::vector<float> *complex_fft) {
|
||||||
|
int32_t dim = complex_fft->size();
|
||||||
|
|
||||||
|
// now we have in complex_fft, first half of complex spectrum
|
||||||
|
// it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
|
||||||
|
|
||||||
|
float *p = complex_fft->data();
|
||||||
|
int32_t half_dim = dim / 2;
|
||||||
|
float first_energy = p[0] * p[0];
|
||||||
|
float last_energy = p[1] * p[1]; // handle this special case
|
||||||
|
|
||||||
|
for (int32_t i = 1; i < half_dim; ++i) {
|
||||||
|
float real = p[i * 2];
|
||||||
|
float im = p[i * 2 + 1];
|
||||||
|
p[i] = real * real + im * im;
|
||||||
|
}
|
||||||
|
p[0] = first_energy;
|
||||||
|
p[half_dim] = last_energy; // Will actually never be used, and anyway
|
||||||
|
// if the signal has been bandlimited sensibly this should be zero.
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace knf
|
@ -0,0 +1,38 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/feature-functions.h
|
||||||
|
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H
|
||||||
|
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
|
||||||
|
// functions in csrc/rfft.h), and converts it into
|
||||||
|
// a power spectrum. If the complex FFT is a vector of size n (representing
|
||||||
|
// half of the complex FFT of a real signal of size n, as described there),
|
||||||
|
// this function computes in the first (n/2) + 1 elements of it, the
|
||||||
|
// energies of the fft bins from zero to the Nyquist frequency. Contents of the
|
||||||
|
// remaining (n/2) - 1 elements are undefined at output.
|
||||||
|
|
||||||
|
void ComputePowerSpectrum(std::vector<float> *complex_fft);
|
||||||
|
|
||||||
|
} // namespace knf
|
||||||
|
|
||||||
|
#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H
|
@ -0,0 +1,236 @@
|
|||||||
|
// kaldi-native-fbank/csrc/feature-window.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/feature-window.cc
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifndef M_2PI
|
||||||
|
#define M_2PI 6.283185307179586476925286766559005
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) {
|
||||||
|
os << opts.ToString();
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
|
||||||
|
: window_(opts.WindowSize()) {
|
||||||
|
int32_t frame_length = opts.WindowSize();
|
||||||
|
KNF_CHECK_GT(frame_length, 0);
|
||||||
|
|
||||||
|
float *window_data = window_.data();
|
||||||
|
|
||||||
|
double a = M_2PI / (frame_length - 1);
|
||||||
|
for (int32_t i = 0; i < frame_length; i++) {
|
||||||
|
double i_fl = static_cast<double>(i);
|
||||||
|
if (opts.window_type == "hanning") {
|
||||||
|
window_data[i] = 0.5 - 0.5 * cos(a * i_fl);
|
||||||
|
} else if (opts.window_type == "sine") {
|
||||||
|
// when you are checking ws wikipedia, please
|
||||||
|
// note that 0.5 * a = M_PI/(frame_length-1)
|
||||||
|
window_data[i] = sin(0.5 * a * i_fl);
|
||||||
|
} else if (opts.window_type == "hamming") {
|
||||||
|
window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
|
||||||
|
} else if (opts.window_type ==
|
||||||
|
"povey") { // like hamming but goes to zero at edges.
|
||||||
|
window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
|
||||||
|
} else if (opts.window_type == "rectangular") {
|
||||||
|
window_data[i] = 1.0;
|
||||||
|
} else if (opts.window_type == "blackman") {
|
||||||
|
window_data[i] = opts.blackman_coeff - 0.5 * cos(a * i_fl) +
|
||||||
|
(0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
|
||||||
|
} else {
|
||||||
|
KNF_LOG(FATAL) << "Invalid window type " << opts.window_type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void FeatureWindowFunction::Apply(float *wave) const {
|
||||||
|
int32_t window_size = window_.size();
|
||||||
|
const float *p = window_.data();
|
||||||
|
for (int32_t k = 0; k != window_size; ++k) {
|
||||||
|
wave[k] *= p[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts) {
|
||||||
|
int64_t frame_shift = opts.WindowShift();
|
||||||
|
if (opts.snip_edges) {
|
||||||
|
return frame * frame_shift;
|
||||||
|
} else {
|
||||||
|
int64_t midpoint_of_frame = frame_shift * frame + frame_shift / 2,
|
||||||
|
beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
|
||||||
|
return beginning_of_frame;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
|
||||||
|
bool flush /*= true*/) {
|
||||||
|
int64_t frame_shift = opts.WindowShift();
|
||||||
|
int64_t frame_length = opts.WindowSize();
|
||||||
|
if (opts.snip_edges) {
|
||||||
|
// with --snip-edges=true (the default), we use a HTK-like approach to
|
||||||
|
// determining the number of frames-- all frames have to fit completely into
|
||||||
|
// the waveform, and the first frame begins at sample zero.
|
||||||
|
if (num_samples < frame_length)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
return (1 + ((num_samples - frame_length) / frame_shift));
|
||||||
|
// You can understand the expression above as follows: 'num_samples -
|
||||||
|
// frame_length' is how much room we have to shift the frame within the
|
||||||
|
// waveform; 'frame_shift' is how much we shift it each time; and the ratio
|
||||||
|
// is how many times we can shift it (integer arithmetic rounds down).
|
||||||
|
} else {
|
||||||
|
// if --snip-edges=false, the number of frames is determined by rounding the
|
||||||
|
// (file-length / frame-shift) to the nearest integer. The point of this
|
||||||
|
// formula is to make the number of frames an obvious and predictable
|
||||||
|
// function of the frame shift and signal length, which makes many
|
||||||
|
// segmentation-related questions simpler.
|
||||||
|
//
|
||||||
|
// Because integer division in C++ rounds toward zero, we add (half the
|
||||||
|
// frame-shift minus epsilon) before dividing, to have the effect of
|
||||||
|
// rounding towards the closest integer.
|
||||||
|
int32_t num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
|
||||||
|
|
||||||
|
if (flush) return num_frames;
|
||||||
|
|
||||||
|
// note: 'end' always means the last plus one, i.e. one past the last.
|
||||||
|
int64_t end_sample_of_last_frame =
|
||||||
|
FirstSampleOfFrame(num_frames - 1, opts) + frame_length;
|
||||||
|
|
||||||
|
// the following code is optimized more for clarity than efficiency.
|
||||||
|
// If flush == false, we can't output frames that extend past the end
|
||||||
|
// of the signal.
|
||||||
|
while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
|
||||||
|
num_frames--;
|
||||||
|
end_sample_of_last_frame -= frame_shift;
|
||||||
|
}
|
||||||
|
return num_frames;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
|
||||||
|
int32_t f, const FrameExtractionOptions &opts,
|
||||||
|
const FeatureWindowFunction &window_function,
|
||||||
|
std::vector<float> *window,
|
||||||
|
float *log_energy_pre_window /*= nullptr*/) {
|
||||||
|
KNF_CHECK(sample_offset >= 0 && wave.size() != 0);
|
||||||
|
|
||||||
|
int32_t frame_length = opts.WindowSize();
|
||||||
|
int32_t frame_length_padded = opts.PaddedWindowSize();
|
||||||
|
|
||||||
|
int64_t num_samples = sample_offset + wave.size();
|
||||||
|
int64_t start_sample = FirstSampleOfFrame(f, opts);
|
||||||
|
int64_t end_sample = start_sample + frame_length;
|
||||||
|
|
||||||
|
if (opts.snip_edges) {
|
||||||
|
KNF_CHECK(start_sample >= sample_offset && end_sample <= num_samples);
|
||||||
|
} else {
|
||||||
|
KNF_CHECK(sample_offset == 0 || start_sample >= sample_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (window->size() != frame_length_padded) {
|
||||||
|
window->resize(frame_length_padded);
|
||||||
|
}
|
||||||
|
|
||||||
|
// wave_start and wave_end are start and end indexes into 'wave', for the
|
||||||
|
// piece of wave that we're trying to extract.
|
||||||
|
int32_t wave_start = int32_t(start_sample - sample_offset);
|
||||||
|
int32_t wave_end = wave_start + frame_length;
|
||||||
|
|
||||||
|
if (wave_start >= 0 && wave_end <= wave.size()) {
|
||||||
|
// the normal case-- no edge effects to consider.
|
||||||
|
std::copy(wave.begin() + wave_start,
|
||||||
|
wave.begin() + wave_start + frame_length, window->data());
|
||||||
|
} else {
|
||||||
|
// Deal with any end effects by reflection, if needed. This code will only
|
||||||
|
// be reached for about two frames per utterance, so we don't concern
|
||||||
|
// ourselves excessively with efficiency.
|
||||||
|
int32_t wave_dim = wave.size();
|
||||||
|
for (int32_t s = 0; s < frame_length; ++s) {
|
||||||
|
int32_t s_in_wave = s + wave_start;
|
||||||
|
while (s_in_wave < 0 || s_in_wave >= wave_dim) {
|
||||||
|
// reflect around the beginning or end of the wave.
|
||||||
|
// e.g. -1 -> 0, -2 -> 1.
|
||||||
|
// dim -> dim - 1, dim + 1 -> dim - 2.
|
||||||
|
// the code supports repeated reflections, although this
|
||||||
|
// would only be needed in pathological cases.
|
||||||
|
if (s_in_wave < 0)
|
||||||
|
s_in_wave = -s_in_wave - 1;
|
||||||
|
else
|
||||||
|
s_in_wave = 2 * wave_dim - 1 - s_in_wave;
|
||||||
|
}
|
||||||
|
(*window)[s] = wave[s_in_wave];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ProcessWindow(opts, window_function, window->data(), log_energy_pre_window);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void RemoveDcOffset(float *d, int32_t n) {
|
||||||
|
float sum = 0;
|
||||||
|
for (int32_t i = 0; i != n; ++i) {
|
||||||
|
sum += d[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
float mean = sum / n;
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != n; ++i) {
|
||||||
|
d[i] -= mean;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float InnerProduct(const float *a, const float *b, int32_t n) {
|
||||||
|
float sum = 0;
|
||||||
|
for (int32_t i = 0; i != n; ++i) {
|
||||||
|
sum += a[i] * b[i];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Preemphasize(float *d, int32_t n, float preemph_coeff) {
|
||||||
|
if (preemph_coeff == 0.0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
KNF_CHECK(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
|
||||||
|
|
||||||
|
for (int32_t i = n - 1; i > 0; --i) {
|
||||||
|
d[i] -= preemph_coeff * d[i - 1];
|
||||||
|
}
|
||||||
|
d[0] -= preemph_coeff * d[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProcessWindow(const FrameExtractionOptions &opts,
|
||||||
|
const FeatureWindowFunction &window_function, float *window,
|
||||||
|
float *log_energy_pre_window /*= nullptr*/) {
|
||||||
|
int32_t frame_length = opts.WindowSize();
|
||||||
|
|
||||||
|
// TODO(fangjun): Remove dither
|
||||||
|
KNF_CHECK_EQ(opts.dither, 0);
|
||||||
|
|
||||||
|
if (opts.remove_dc_offset) {
|
||||||
|
RemoveDcOffset(window, frame_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (log_energy_pre_window != NULL) {
|
||||||
|
float energy = std::max<float>(InnerProduct(window, window, frame_length),
|
||||||
|
std::numeric_limits<float>::epsilon());
|
||||||
|
*log_energy_pre_window = std::log(energy);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opts.preemph_coeff != 0.0) {
|
||||||
|
Preemphasize(window, frame_length, opts.preemph_coeff);
|
||||||
|
}
|
||||||
|
|
||||||
|
window_function.Apply(window);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace knf
|
@ -0,0 +1,178 @@
|
|||||||
|
// kaldi-native-fbank/csrc/feature-window.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/feature-window.h
|
||||||
|
|
||||||
|
#ifndef KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_
|
||||||
|
#define KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/log.h"
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
inline int32_t RoundUpToNearestPowerOfTwo(int32_t n) {
|
||||||
|
// copied from kaldi/src/base/kaldi-math.cc
|
||||||
|
KNF_CHECK_GT(n, 0);
|
||||||
|
n--;
|
||||||
|
n |= n >> 1;
|
||||||
|
n |= n >> 2;
|
||||||
|
n |= n >> 4;
|
||||||
|
n |= n >> 8;
|
||||||
|
n |= n >> 16;
|
||||||
|
return n + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct FrameExtractionOptions {
|
||||||
|
float samp_freq = 16000;
|
||||||
|
float frame_shift_ms = 10.0f; // in milliseconds.
|
||||||
|
float frame_length_ms = 25.0f; // in milliseconds.
|
||||||
|
float dither = 1.0f; // Amount of dithering, 0.0 means no dither.
|
||||||
|
float preemph_coeff = 0.97f; // Preemphasis coefficient.
|
||||||
|
bool remove_dc_offset = true; // Subtract mean of wave before FFT.
|
||||||
|
std::string window_type = "povey"; // e.g. Hamming window
|
||||||
|
// May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
|
||||||
|
// "povey" is a window I made to be similar to Hamming but to go to zero at
|
||||||
|
// the edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) I just don't think the
|
||||||
|
// Hamming window makes sense as a windowing function.
|
||||||
|
bool round_to_power_of_two = true;
|
||||||
|
float blackman_coeff = 0.42f;
|
||||||
|
bool snip_edges = true;
|
||||||
|
// bool allow_downsample = false;
|
||||||
|
// bool allow_upsample = false;
|
||||||
|
|
||||||
|
// Used for streaming feature extraction. It indicates the number
|
||||||
|
// of feature frames to keep in the recycling vector. -1 means to
|
||||||
|
// keep all feature frames.
|
||||||
|
int32_t max_feature_vectors = -1;
|
||||||
|
|
||||||
|
int32_t WindowShift() const {
|
||||||
|
return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
|
||||||
|
}
|
||||||
|
int32_t WindowSize() const {
|
||||||
|
return static_cast<int32_t>(samp_freq * 0.001f * frame_length_ms);
|
||||||
|
}
|
||||||
|
int32_t PaddedWindowSize() const {
|
||||||
|
return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize())
|
||||||
|
: WindowSize());
|
||||||
|
}
|
||||||
|
std::string ToString() const {
|
||||||
|
std::ostringstream os;
|
||||||
|
#define KNF_PRINT(x) os << #x << ": " << x << "\n"
|
||||||
|
KNF_PRINT(samp_freq);
|
||||||
|
KNF_PRINT(frame_shift_ms);
|
||||||
|
KNF_PRINT(frame_length_ms);
|
||||||
|
KNF_PRINT(dither);
|
||||||
|
KNF_PRINT(preemph_coeff);
|
||||||
|
KNF_PRINT(remove_dc_offset);
|
||||||
|
KNF_PRINT(window_type);
|
||||||
|
KNF_PRINT(round_to_power_of_two);
|
||||||
|
KNF_PRINT(blackman_coeff);
|
||||||
|
KNF_PRINT(snip_edges);
|
||||||
|
// KNF_PRINT(allow_downsample);
|
||||||
|
// KNF_PRINT(allow_upsample);
|
||||||
|
KNF_PRINT(max_feature_vectors);
|
||||||
|
#undef KNF_PRINT
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts);
|
||||||
|
|
||||||
|
class FeatureWindowFunction {
|
||||||
|
public:
|
||||||
|
FeatureWindowFunction() = default;
|
||||||
|
explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
|
||||||
|
/**
|
||||||
|
* @param wave Pointer to a 1-D array of shape [window_size].
|
||||||
|
* It is modified in-place: wave[i] = wave[i] * window_[i].
|
||||||
|
* @param
|
||||||
|
*/
|
||||||
|
void Apply(float *wave) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<float> window_; // of size opts.WindowSize()
|
||||||
|
};
|
||||||
|
|
||||||
|
int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts);
|
||||||
|
|
||||||
|
/**
|
||||||
|
This function returns the number of frames that we can extract from a wave
|
||||||
|
file with the given number of samples in it (assumed to have the same
|
||||||
|
sampling rate as specified in 'opts').
|
||||||
|
|
||||||
|
@param [in] num_samples The number of samples in the wave file.
|
||||||
|
@param [in] opts The frame-extraction options class
|
||||||
|
|
||||||
|
@param [in] flush True if we are asserting that this number of samples
|
||||||
|
is 'all there is', false if we expecting more data to possibly come in. This
|
||||||
|
only makes a difference to the answer
|
||||||
|
if opts.snips_edges== false. For offline feature extraction you always want
|
||||||
|
flush == true. In an online-decoding context, once you know (or decide) that
|
||||||
|
no more data is coming in, you'd call it with flush == true at the end to
|
||||||
|
flush out any remaining data.
|
||||||
|
*/
|
||||||
|
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
|
||||||
|
bool flush = true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
ExtractWindow() extracts a windowed frame of waveform (possibly with a
|
||||||
|
power-of-two, padded size, depending on the config), including all the
|
||||||
|
processing done by ProcessWindow().
|
||||||
|
|
||||||
|
@param [in] sample_offset If 'wave' is not the entire waveform, but
|
||||||
|
part of it to the left has been discarded, then the
|
||||||
|
number of samples prior to 'wave' that we have
|
||||||
|
already discarded. Set this to zero if you are
|
||||||
|
processing the entire waveform in one piece, or
|
||||||
|
if you get 'no matching function' compilation
|
||||||
|
errors when updating the code.
|
||||||
|
@param [in] wave The waveform
|
||||||
|
@param [in] f The frame index to be extracted, with
|
||||||
|
0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
|
||||||
|
@param [in] opts The options class to be used
|
||||||
|
@param [in] window_function The windowing function, as derived from the
|
||||||
|
options class.
|
||||||
|
@param [out] window The windowed, possibly-padded waveform to be
|
||||||
|
extracted. Will be resized as needed.
|
||||||
|
@param [out] log_energy_pre_window If non-NULL, the log-energy of
|
||||||
|
the signal prior to pre-emphasis and multiplying by
|
||||||
|
the windowing function will be written to here.
|
||||||
|
*/
|
||||||
|
void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
|
||||||
|
int32_t f, const FrameExtractionOptions &opts,
|
||||||
|
const FeatureWindowFunction &window_function,
|
||||||
|
std::vector<float> *window,
|
||||||
|
float *log_energy_pre_window = nullptr);
|
||||||
|
|
||||||
|
/**
|
||||||
|
This function does all the windowing steps after actually
|
||||||
|
extracting the windowed signal: depending on the
|
||||||
|
configuration, it does dithering, dc offset removal,
|
||||||
|
preemphasis, and multiplication by the windowing function.
|
||||||
|
@param [in] opts The options class to be used
|
||||||
|
@param [in] window_function The windowing function-- should have
|
||||||
|
been initialized using 'opts'.
|
||||||
|
@param [in,out] window A vector of size opts.WindowSize(). Note:
|
||||||
|
it will typically be a sub-vector of a larger vector of size
|
||||||
|
opts.PaddedWindowSize(), with the remaining samples zero,
|
||||||
|
as the FFT code is more efficient if it operates on data with
|
||||||
|
power-of-two size.
|
||||||
|
@param [out] log_energy_pre_window If non-NULL, then after dithering and
|
||||||
|
DC offset removal, this function will write to this pointer the log of
|
||||||
|
the total energy (i.e. sum-squared) of the frame.
|
||||||
|
*/
|
||||||
|
void ProcessWindow(const FrameExtractionOptions &opts,
|
||||||
|
const FeatureWindowFunction &window_function, float *window,
|
||||||
|
float *log_energy_pre_window = nullptr);
|
||||||
|
|
||||||
|
// Compute the inner product of two vectors
|
||||||
|
float InnerProduct(const float *a, const float *b, int32_t n);
|
||||||
|
|
||||||
|
} // namespace knf
|
||||||
|
|
||||||
|
#endif // KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,143 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stack trace related stuff is from kaldi.
|
||||||
|
* Refer to
|
||||||
|
* https://github.com/kaldi-asr/kaldi/blob/master/src/base/kaldi-error.cc
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/log.h"
|
||||||
|
|
||||||
|
#ifdef KNF_HAVE_EXECINFO_H
|
||||||
|
#include <execinfo.h> // To get stack trace in error messages.
|
||||||
|
#ifdef KNF_HAVE_CXXABI_H
|
||||||
|
#include <cxxabi.h> // For name demangling.
|
||||||
|
// Useful to decode the stack trace, but only used if we have execinfo.h
|
||||||
|
#endif // KNF_HAVE_CXXABI_H
|
||||||
|
#endif // KNF_HAVE_EXECINFO_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <ctime>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
std::string GetDateTimeStr() {
|
||||||
|
std::ostringstream os;
|
||||||
|
std::time_t t = std::time(nullptr);
|
||||||
|
std::tm tm = *std::localtime(&t);
|
||||||
|
os << std::put_time(&tm, "%F %T"); // yyyy-mm-dd hh:mm:ss
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool LocateSymbolRange(const std::string &trace_name, std::size_t *begin,
|
||||||
|
std::size_t *end) {
|
||||||
|
// Find the first '_' with leading ' ' or '('.
|
||||||
|
*begin = std::string::npos;
|
||||||
|
for (std::size_t i = 1; i < trace_name.size(); ++i) {
|
||||||
|
if (trace_name[i] != '_') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
|
||||||
|
*begin = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (*begin == std::string::npos) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*end = trace_name.find_first_of(" +", *begin);
|
||||||
|
return *end != std::string::npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef KNF_HAVE_EXECINFO_H
|
||||||
|
static std::string Demangle(const std::string &trace_name) {
|
||||||
|
#ifndef KNF_HAVE_CXXABI_H
|
||||||
|
return trace_name;
|
||||||
|
#else // KNF_HAVE_CXXABI_H
|
||||||
|
// Try demangle the symbol. We are trying to support the following formats
|
||||||
|
// produced by different platforms:
|
||||||
|
//
|
||||||
|
// Linux:
|
||||||
|
// ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
|
||||||
|
//
|
||||||
|
// Mac:
|
||||||
|
// 0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
|
||||||
|
//
|
||||||
|
// We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
|
||||||
|
// demangle it info a readable name like kaldi::UnitTextError.
|
||||||
|
std::size_t begin, end;
|
||||||
|
if (!LocateSymbolRange(trace_name, &begin, &end)) {
|
||||||
|
return trace_name;
|
||||||
|
}
|
||||||
|
std::string symbol = trace_name.substr(begin, end - begin);
|
||||||
|
int status;
|
||||||
|
char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
|
||||||
|
if (status == 0 && demangled_name != nullptr) {
|
||||||
|
symbol = demangled_name;
|
||||||
|
free(demangled_name);
|
||||||
|
}
|
||||||
|
return trace_name.substr(0, begin) + symbol +
|
||||||
|
trace_name.substr(end, std::string::npos);
|
||||||
|
#endif // KNF_HAVE_CXXABI_H
|
||||||
|
}
|
||||||
|
#endif // KNF_HAVE_EXECINFO_H
|
||||||
|
|
||||||
|
std::string GetStackTrace() {
|
||||||
|
std::string ans;
|
||||||
|
#ifdef KNF_HAVE_EXECINFO_H
|
||||||
|
constexpr const std::size_t kMaxTraceSize = 50;
|
||||||
|
constexpr const std::size_t kMaxTracePrint = 50; // Must be even.
|
||||||
|
// Buffer for the trace.
|
||||||
|
void *trace[kMaxTraceSize];
|
||||||
|
// Get the trace.
|
||||||
|
std::size_t size = backtrace(trace, kMaxTraceSize);
|
||||||
|
// Get the trace symbols.
|
||||||
|
char **trace_symbol = backtrace_symbols(trace, size);
|
||||||
|
if (trace_symbol == nullptr)
|
||||||
|
return ans;
|
||||||
|
|
||||||
|
// Compose a human-readable backtrace string.
|
||||||
|
ans += "[ Stack-Trace: ]\n";
|
||||||
|
if (size <= kMaxTracePrint) {
|
||||||
|
for (std::size_t i = 0; i < size; ++i) {
|
||||||
|
ans += Demangle(trace_symbol[i]) + "\n";
|
||||||
|
}
|
||||||
|
} else { // Print out first+last (e.g.) 5.
|
||||||
|
for (std::size_t i = 0; i < kMaxTracePrint / 2; ++i) {
|
||||||
|
ans += Demangle(trace_symbol[i]) + "\n";
|
||||||
|
}
|
||||||
|
ans += ".\n.\n.\n";
|
||||||
|
for (std::size_t i = size - kMaxTracePrint / 2; i < size; ++i) {
|
||||||
|
ans += Demangle(trace_symbol[i]) + "\n";
|
||||||
|
}
|
||||||
|
if (size == kMaxTraceSize)
|
||||||
|
ans += ".\n.\n.\n"; // Stack was too long, probably a bug.
|
||||||
|
}
|
||||||
|
|
||||||
|
// We must free the array of pointers allocated by backtrace_symbols(),
|
||||||
|
// but not the strings themselves.
|
||||||
|
free(trace_symbol);
|
||||||
|
#endif // KNF_HAVE_EXECINFO_H
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace knf
|
@ -0,0 +1,347 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// The content in this file is copied/modified from
|
||||||
|
// https://github.com/k2-fsa/k2/blob/master/k2/csrc/log.h
|
||||||
|
#ifndef KALDI_NATIVE_FBANK_CSRC_LOG_H_
|
||||||
|
#define KALDI_NATIVE_FBANK_CSRC_LOG_H_
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <mutex> // NOLINT
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
#if defined(NDEBUG)
|
||||||
|
constexpr bool kDisableDebug = true;
|
||||||
|
#else
|
||||||
|
constexpr bool kDisableDebug = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum class LogLevel {
|
||||||
|
kTrace = 0,
|
||||||
|
kDebug = 1,
|
||||||
|
kInfo = 2,
|
||||||
|
kWarning = 3,
|
||||||
|
kError = 4,
|
||||||
|
kFatal = 5, // print message and abort the program
|
||||||
|
};
|
||||||
|
|
||||||
|
// They are used in KNF_LOG(xxx), so their names
|
||||||
|
// do not follow the google c++ code style
|
||||||
|
//
|
||||||
|
// You can use them in the following way:
|
||||||
|
//
|
||||||
|
// KNF_LOG(TRACE) << "some message";
|
||||||
|
// KNF_LOG(DEBUG) << "some message";
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
constexpr LogLevel TRACE = LogLevel::kTrace;
|
||||||
|
constexpr LogLevel DEBUG = LogLevel::kDebug;
|
||||||
|
constexpr LogLevel INFO = LogLevel::kInfo;
|
||||||
|
constexpr LogLevel WARNING = LogLevel::kWarning;
|
||||||
|
constexpr LogLevel ERROR = LogLevel::kError;
|
||||||
|
constexpr LogLevel FATAL = LogLevel::kFatal;
|
||||||
|
#else
|
||||||
|
#define TRACE LogLevel::kTrace
|
||||||
|
#define DEBUG LogLevel::kDebug
|
||||||
|
#define INFO LogLevel::kInfo
|
||||||
|
#define WARNING LogLevel::kWarning
|
||||||
|
#define ERROR LogLevel::kError
|
||||||
|
#define FATAL LogLevel::kFatal
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string GetStackTrace();
|
||||||
|
|
||||||
|
/* Return the current log level.
|
||||||
|
|
||||||
|
|
||||||
|
If the current log level is TRACE, then all logged messages are printed out.
|
||||||
|
|
||||||
|
If the current log level is DEBUG, log messages with "TRACE" level are not
|
||||||
|
shown and all other levels are printed out.
|
||||||
|
|
||||||
|
Similarly, if the current log level is INFO, log message with "TRACE" and
|
||||||
|
"DEBUG" are not shown and all other levels are printed out.
|
||||||
|
|
||||||
|
If it is FATAL, then only FATAL messages are shown.
|
||||||
|
*/
|
||||||
|
inline LogLevel GetCurrentLogLevel() {
|
||||||
|
static LogLevel log_level = INFO;
|
||||||
|
static std::once_flag init_flag;
|
||||||
|
std::call_once(init_flag, []() {
|
||||||
|
const char *env_log_level = std::getenv("KNF_LOG_LEVEL");
|
||||||
|
if (env_log_level == nullptr) return;
|
||||||
|
|
||||||
|
std::string s = env_log_level;
|
||||||
|
if (s == "TRACE")
|
||||||
|
log_level = TRACE;
|
||||||
|
else if (s == "DEBUG")
|
||||||
|
log_level = DEBUG;
|
||||||
|
else if (s == "INFO")
|
||||||
|
log_level = INFO;
|
||||||
|
else if (s == "WARNING")
|
||||||
|
log_level = WARNING;
|
||||||
|
else if (s == "ERROR")
|
||||||
|
log_level = ERROR;
|
||||||
|
else if (s == "FATAL")
|
||||||
|
log_level = FATAL;
|
||||||
|
else
|
||||||
|
fprintf(stderr,
|
||||||
|
"Unknown KNF_LOG_LEVEL: %s"
|
||||||
|
"\nSupported values are: "
|
||||||
|
"TRACE, DEBUG, INFO, WARNING, ERROR, FATAL",
|
||||||
|
s.c_str());
|
||||||
|
});
|
||||||
|
return log_level;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool EnableAbort() {
|
||||||
|
static std::once_flag init_flag;
|
||||||
|
static bool enable_abort = false;
|
||||||
|
std::call_once(init_flag, []() {
|
||||||
|
enable_abort = (std::getenv("KNF_ABORT") != nullptr);
|
||||||
|
});
|
||||||
|
return enable_abort;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Logger {
|
||||||
|
public:
|
||||||
|
Logger(const char *filename, const char *func_name, uint32_t line_num,
|
||||||
|
LogLevel level)
|
||||||
|
: filename_(filename),
|
||||||
|
func_name_(func_name),
|
||||||
|
line_num_(line_num),
|
||||||
|
level_(level) {
|
||||||
|
cur_level_ = GetCurrentLogLevel();
|
||||||
|
fprintf(stderr, "here\n");
|
||||||
|
switch (level) {
|
||||||
|
case TRACE:
|
||||||
|
if (cur_level_ <= TRACE) fprintf(stderr, "[T] ");
|
||||||
|
break;
|
||||||
|
case DEBUG:
|
||||||
|
if (cur_level_ <= DEBUG) fprintf(stderr, "[D] ");
|
||||||
|
break;
|
||||||
|
case INFO:
|
||||||
|
if (cur_level_ <= INFO) fprintf(stderr, "[I] ");
|
||||||
|
break;
|
||||||
|
case WARNING:
|
||||||
|
if (cur_level_ <= WARNING) fprintf(stderr, "[W] ");
|
||||||
|
break;
|
||||||
|
case ERROR:
|
||||||
|
if (cur_level_ <= ERROR) fprintf(stderr, "[E] ");
|
||||||
|
break;
|
||||||
|
case FATAL:
|
||||||
|
if (cur_level_ <= FATAL) fprintf(stderr, "[F] ");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur_level_ <= level_) {
|
||||||
|
fprintf(stderr, "%s:%u:%s ", filename, line_num, func_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
~Logger() noexcept(false) {
|
||||||
|
static constexpr const char *kErrMsg = R"(
|
||||||
|
Some bad things happened. Please read the above error messages and stack
|
||||||
|
trace. If you are using Python, the following command may be helpful:
|
||||||
|
|
||||||
|
gdb --args python /path/to/your/code.py
|
||||||
|
|
||||||
|
(You can use `gdb` to debug the code. Please consider compiling
|
||||||
|
a debug version of KNF.).
|
||||||
|
|
||||||
|
If you are unable to fix it, please open an issue at:
|
||||||
|
|
||||||
|
https://github.com/csukuangfj/kaldi-native-fbank/issues/new
|
||||||
|
)";
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
if (level_ == FATAL) {
|
||||||
|
std::string stack_trace = GetStackTrace();
|
||||||
|
if (!stack_trace.empty()) {
|
||||||
|
fprintf(stderr, "\n\n%s\n", stack_trace.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
fflush(nullptr);
|
||||||
|
|
||||||
|
#ifndef __ANDROID_API__
|
||||||
|
if (EnableAbort()) {
|
||||||
|
// NOTE: abort() will terminate the program immediately without
|
||||||
|
// printing the Python stack backtrace.
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
throw std::runtime_error(kErrMsg);
|
||||||
|
#else
|
||||||
|
abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(bool b) const {
|
||||||
|
if (cur_level_ <= level_) {
|
||||||
|
fprintf(stderr, b ? "true" : "false");
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(int8_t i) const {
|
||||||
|
if (cur_level_ <= level_) fprintf(stderr, "%d", i);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(const char *s) const {
|
||||||
|
if (cur_level_ <= level_) fprintf(stderr, "%s", s);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(int32_t i) const {
|
||||||
|
if (cur_level_ <= level_) fprintf(stderr, "%d", i);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(uint32_t i) const {
|
||||||
|
if (cur_level_ <= level_) fprintf(stderr, "%u", i);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(uint64_t i) const {
|
||||||
|
if (cur_level_ <= level_)
|
||||||
|
fprintf(stderr, "%llu", (long long unsigned int)i); // NOLINT
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(int64_t i) const {
|
||||||
|
if (cur_level_ <= level_)
|
||||||
|
fprintf(stderr, "%lli", (long long int)i); // NOLINT
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(float f) const {
|
||||||
|
if (cur_level_ <= level_) fprintf(stderr, "%f", f);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Logger &operator<<(double d) const {
|
||||||
|
if (cur_level_ <= level_) fprintf(stderr, "%f", d);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
const Logger &operator<<(const T &t) const {
|
||||||
|
// require T overloads operator<<
|
||||||
|
std::ostringstream os;
|
||||||
|
os << t;
|
||||||
|
return *this << os.str().c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// specialization to fix compile error: `stringstream << nullptr` is ambiguous
|
||||||
|
const Logger &operator<<(const std::nullptr_t &null) const {
|
||||||
|
if (cur_level_ <= level_) *this << "(null)";
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char *filename_;
|
||||||
|
const char *func_name_;
|
||||||
|
uint32_t line_num_;
|
||||||
|
LogLevel level_;
|
||||||
|
LogLevel cur_level_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Voidifier {
|
||||||
|
public:
|
||||||
|
void operator&(const Logger &)const {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace knf
|
||||||
|
|
||||||
|
#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \
|
||||||
|
defined(__PRETTY_FUNCTION__)
|
||||||
|
// for clang and GCC
|
||||||
|
#define KNF_FUNC __PRETTY_FUNCTION__
|
||||||
|
#else
|
||||||
|
// for other compilers
|
||||||
|
#define KNF_FUNC __func__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define KNF_STATIC_ASSERT(x) static_assert(x, "")
|
||||||
|
|
||||||
|
#define KNF_CHECK(x) \
|
||||||
|
(x) ? (void)0 \
|
||||||
|
: ::knf::Voidifier() & \
|
||||||
|
::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
|
||||||
|
<< "Check failed: " << #x << " "
|
||||||
|
|
||||||
|
// WARNING: x and y may be evaluated multiple times, but this happens only
|
||||||
|
// when the check fails. Since the program aborts if it fails, we don't think
|
||||||
|
// the extra evaluation of x and y matters.
|
||||||
|
//
|
||||||
|
// CAUTION: we recommend the following use case:
|
||||||
|
//
|
||||||
|
// auto x = Foo();
|
||||||
|
// auto y = Bar();
|
||||||
|
// KNF_CHECK_EQ(x, y) << "Some message";
|
||||||
|
//
|
||||||
|
// And please avoid
|
||||||
|
//
|
||||||
|
// KNF_CHECK_EQ(Foo(), Bar());
|
||||||
|
//
|
||||||
|
// if `Foo()` or `Bar()` causes some side effects, e.g., changing some
|
||||||
|
// local static variables or global variables.
|
||||||
|
#define _KNF_CHECK_OP(x, y, op) \
|
||||||
|
((x)op(y)) ? (void)0 \
|
||||||
|
: ::knf::Voidifier() & \
|
||||||
|
::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
|
||||||
|
<< "Check failed: " << #x << " " << #op << " " << #y \
|
||||||
|
<< " (" << (x) << " vs. " << (y) << ") "
|
||||||
|
|
||||||
|
#define KNF_CHECK_EQ(x, y) _KNF_CHECK_OP(x, y, ==)
|
||||||
|
#define KNF_CHECK_NE(x, y) _KNF_CHECK_OP(x, y, !=)
|
||||||
|
#define KNF_CHECK_LT(x, y) _KNF_CHECK_OP(x, y, <)
|
||||||
|
#define KNF_CHECK_LE(x, y) _KNF_CHECK_OP(x, y, <=)
|
||||||
|
#define KNF_CHECK_GT(x, y) _KNF_CHECK_OP(x, y, >)
|
||||||
|
#define KNF_CHECK_GE(x, y) _KNF_CHECK_OP(x, y, >=)
|
||||||
|
|
||||||
|
#define KNF_LOG(x) ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::x)
|
||||||
|
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
// For debug check
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
// If you define the macro "-D NDEBUG" while compiling kaldi-native-fbank,
|
||||||
|
// the following macros are in fact empty and does nothing.
|
||||||
|
|
||||||
|
#define KNF_DCHECK(x) ::knf::kDisableDebug ? (void)0 : KNF_CHECK(x)
|
||||||
|
|
||||||
|
#define KNF_DCHECK_EQ(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_EQ(x, y)
|
||||||
|
|
||||||
|
#define KNF_DCHECK_NE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_NE(x, y)
|
||||||
|
|
||||||
|
#define KNF_DCHECK_LT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LT(x, y)
|
||||||
|
|
||||||
|
#define KNF_DCHECK_LE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LE(x, y)
|
||||||
|
|
||||||
|
#define KNF_DCHECK_GT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GT(x, y)
|
||||||
|
|
||||||
|
#define KNF_DCHECK_GE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GE(x, y)
|
||||||
|
|
||||||
|
#define KNF_DLOG(x) \
|
||||||
|
::knf::kDisableDebug ? (void)0 : ::knf::Voidifier() & KNF_LOG(x)
|
||||||
|
|
||||||
|
#endif // KALDI_NATIVE_FBANK_CSRC_LOG_H_
|
@ -0,0 +1,256 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/mel-computations.cc
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/mel-computations.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts) {
|
||||||
|
os << opts.ToString();
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
float MelBanks::VtlnWarpFreq(
|
||||||
|
float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
|
||||||
|
float vtln_high_cutoff,
|
||||||
|
float low_freq, // upper+lower frequency cutoffs in mel computation
|
||||||
|
float high_freq, float vtln_warp_factor, float freq) {
|
||||||
|
/// This computes a VTLN warping function that is not the same as HTK's one,
|
||||||
|
/// but has similar inputs (this function has the advantage of never producing
|
||||||
|
/// empty bins).
|
||||||
|
|
||||||
|
/// This function computes a warp function F(freq), defined between low_freq
|
||||||
|
/// and high_freq inclusive, with the following properties:
|
||||||
|
/// F(low_freq) == low_freq
|
||||||
|
/// F(high_freq) == high_freq
|
||||||
|
/// The function is continuous and piecewise linear with two inflection
|
||||||
|
/// points.
|
||||||
|
/// The lower inflection point (measured in terms of the unwarped
|
||||||
|
/// frequency) is at frequency l, determined as described below.
|
||||||
|
/// The higher inflection point is at a frequency h, determined as
|
||||||
|
/// described below.
|
||||||
|
/// If l <= f <= h, then F(f) = f/vtln_warp_factor.
|
||||||
|
/// If the higher inflection point (measured in terms of the unwarped
|
||||||
|
/// frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
|
||||||
|
/// Since (by the last point) F(h) == h/vtln_warp_factor, then
|
||||||
|
/// max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
|
||||||
|
/// h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
|
||||||
|
/// = vtln_high_cutoff * min(1, vtln_warp_factor).
|
||||||
|
/// If the lower inflection point (measured in terms of the unwarped
|
||||||
|
/// frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
|
||||||
|
/// This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
|
||||||
|
/// = vtln_low_cutoff * max(1, vtln_warp_factor)
|
||||||
|
|
||||||
|
if (freq < low_freq || freq > high_freq)
|
||||||
|
return freq; // in case this gets called
|
||||||
|
// for out-of-range frequencies, just return the freq.
|
||||||
|
|
||||||
|
KNF_CHECK_GT(vtln_low_cutoff, low_freq);
|
||||||
|
KNF_CHECK_LT(vtln_high_cutoff, high_freq);
|
||||||
|
|
||||||
|
float one = 1.0f;
|
||||||
|
float l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
|
||||||
|
float h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
|
||||||
|
float scale = 1.0f / vtln_warp_factor;
|
||||||
|
float Fl = scale * l; // F(l);
|
||||||
|
float Fh = scale * h; // F(h);
|
||||||
|
KNF_CHECK(l > low_freq && h < high_freq);
|
||||||
|
// slope of left part of the 3-piece linear function
|
||||||
|
float scale_left = (Fl - low_freq) / (l - low_freq);
|
||||||
|
// [slope of center part is just "scale"]
|
||||||
|
|
||||||
|
// slope of right part of the 3-piece linear function
|
||||||
|
float scale_right = (high_freq - Fh) / (high_freq - h);
|
||||||
|
|
||||||
|
if (freq < l) {
|
||||||
|
return low_freq + scale_left * (freq - low_freq);
|
||||||
|
} else if (freq < h) {
|
||||||
|
return scale * freq;
|
||||||
|
} else { // freq >= h
|
||||||
|
return high_freq + scale_right * (freq - high_freq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float MelBanks::VtlnWarpMelFreq(
|
||||||
|
float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
|
||||||
|
float vtln_high_cutoff,
|
||||||
|
float low_freq, // upper+lower frequency cutoffs in mel computation
|
||||||
|
float high_freq, float vtln_warp_factor, float mel_freq) {
|
||||||
|
return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, low_freq,
|
||||||
|
high_freq, vtln_warp_factor,
|
||||||
|
InverseMelScale(mel_freq)));
|
||||||
|
}
|
||||||
|
|
||||||
|
MelBanks::MelBanks(const MelBanksOptions &opts,
|
||||||
|
const FrameExtractionOptions &frame_opts,
|
||||||
|
float vtln_warp_factor)
|
||||||
|
: htk_mode_(opts.htk_mode) {
|
||||||
|
int32_t num_bins = opts.num_bins;
|
||||||
|
if (num_bins < 3) KNF_LOG(FATAL) << "Must have at least 3 mel bins";
|
||||||
|
|
||||||
|
float sample_freq = frame_opts.samp_freq;
|
||||||
|
int32_t window_length_padded = frame_opts.PaddedWindowSize();
|
||||||
|
KNF_CHECK_EQ(window_length_padded % 2, 0);
|
||||||
|
|
||||||
|
int32_t num_fft_bins = window_length_padded / 2;
|
||||||
|
float nyquist = 0.5f * sample_freq;
|
||||||
|
|
||||||
|
float low_freq = opts.low_freq, high_freq;
|
||||||
|
if (opts.high_freq > 0.0f)
|
||||||
|
high_freq = opts.high_freq;
|
||||||
|
else
|
||||||
|
high_freq = nyquist + opts.high_freq;
|
||||||
|
|
||||||
|
if (low_freq < 0.0f || low_freq >= nyquist || high_freq <= 0.0f ||
|
||||||
|
high_freq > nyquist || high_freq <= low_freq) {
|
||||||
|
KNF_LOG(FATAL) << "Bad values in options: low-freq " << low_freq
|
||||||
|
<< " and high-freq " << high_freq << " vs. nyquist "
|
||||||
|
<< nyquist;
|
||||||
|
}
|
||||||
|
|
||||||
|
float fft_bin_width = sample_freq / window_length_padded;
|
||||||
|
// fft-bin width [think of it as Nyquist-freq / half-window-length]
|
||||||
|
|
||||||
|
float mel_low_freq = MelScale(low_freq);
|
||||||
|
float mel_high_freq = MelScale(high_freq);
|
||||||
|
|
||||||
|
debug_ = opts.debug_mel;
|
||||||
|
|
||||||
|
// divide by num_bins+1 in next line because of end-effects where the bins
|
||||||
|
// spread out to the sides.
|
||||||
|
float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
|
||||||
|
|
||||||
|
float vtln_low = opts.vtln_low, vtln_high = opts.vtln_high;
|
||||||
|
if (vtln_high < 0.0f) {
|
||||||
|
vtln_high += nyquist;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vtln_warp_factor != 1.0f &&
|
||||||
|
(vtln_low < 0.0f || vtln_low <= low_freq || vtln_low >= high_freq ||
|
||||||
|
vtln_high <= 0.0f || vtln_high >= high_freq || vtln_high <= vtln_low)) {
|
||||||
|
KNF_LOG(FATAL) << "Bad values in options: vtln-low " << vtln_low
|
||||||
|
<< " and vtln-high " << vtln_high << ", versus "
|
||||||
|
<< "low-freq " << low_freq << " and high-freq " << high_freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
bins_.resize(num_bins);
|
||||||
|
center_freqs_.resize(num_bins);
|
||||||
|
|
||||||
|
for (int32_t bin = 0; bin < num_bins; ++bin) {
|
||||||
|
float left_mel = mel_low_freq + bin * mel_freq_delta,
|
||||||
|
center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
|
||||||
|
right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
|
||||||
|
|
||||||
|
if (vtln_warp_factor != 1.0f) {
|
||||||
|
left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
|
||||||
|
vtln_warp_factor, left_mel);
|
||||||
|
center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
|
||||||
|
vtln_warp_factor, center_mel);
|
||||||
|
right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
|
||||||
|
vtln_warp_factor, right_mel);
|
||||||
|
}
|
||||||
|
center_freqs_[bin] = InverseMelScale(center_mel);
|
||||||
|
|
||||||
|
// this_bin will be a vector of coefficients that is only
|
||||||
|
// nonzero where this mel bin is active.
|
||||||
|
std::vector<float> this_bin(num_fft_bins);
|
||||||
|
|
||||||
|
int32_t first_index = -1, last_index = -1;
|
||||||
|
for (int32_t i = 0; i < num_fft_bins; ++i) {
|
||||||
|
float freq = (fft_bin_width * i); // Center frequency of this fft
|
||||||
|
// bin.
|
||||||
|
float mel = MelScale(freq);
|
||||||
|
if (mel > left_mel && mel < right_mel) {
|
||||||
|
float weight;
|
||||||
|
if (mel <= center_mel)
|
||||||
|
weight = (mel - left_mel) / (center_mel - left_mel);
|
||||||
|
else
|
||||||
|
weight = (right_mel - mel) / (right_mel - center_mel);
|
||||||
|
this_bin[i] = weight;
|
||||||
|
if (first_index == -1) first_index = i;
|
||||||
|
last_index = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
KNF_CHECK(first_index != -1 && last_index >= first_index &&
|
||||||
|
"You may have set num_mel_bins too large.");
|
||||||
|
|
||||||
|
bins_[bin].first = first_index;
|
||||||
|
int32_t size = last_index + 1 - first_index;
|
||||||
|
bins_[bin].second.insert(bins_[bin].second.end(),
|
||||||
|
this_bin.begin() + first_index,
|
||||||
|
this_bin.begin() + first_index + size);
|
||||||
|
|
||||||
|
// Replicate a bug in HTK, for testing purposes.
|
||||||
|
if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f) {
|
||||||
|
bins_[bin].second[0] = 0.0;
|
||||||
|
}
|
||||||
|
} // for (int32_t bin = 0; bin < num_bins; ++bin) {
|
||||||
|
|
||||||
|
if (debug_) {
|
||||||
|
std::ostringstream os;
|
||||||
|
for (size_t i = 0; i < bins_.size(); i++) {
|
||||||
|
os << "bin " << i << ", offset = " << bins_[i].first << ", vec = ";
|
||||||
|
for (auto k : bins_[i].second) os << k << ", ";
|
||||||
|
os << "\n";
|
||||||
|
}
|
||||||
|
KNF_LOG(INFO) << os.str();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// "power_spectrum" contains fft energies.
|
||||||
|
void MelBanks::Compute(const float *power_spectrum,
|
||||||
|
float *mel_energies_out) const {
|
||||||
|
int32_t num_bins = bins_.size();
|
||||||
|
|
||||||
|
for (int32_t i = 0; i < num_bins; i++) {
|
||||||
|
int32_t offset = bins_[i].first;
|
||||||
|
const auto &v = bins_[i].second;
|
||||||
|
float energy = 0;
|
||||||
|
for (int32_t k = 0; k != v.size(); ++k) {
|
||||||
|
energy += v[k] * power_spectrum[k + offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTK-like flooring- for testing purposes (we prefer dither)
|
||||||
|
if (htk_mode_ && energy < 1.0) {
|
||||||
|
energy = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mel_energies_out[i] = energy;
|
||||||
|
|
||||||
|
// The following assert was added due to a problem with OpenBlas that
|
||||||
|
// we had at one point (it was a bug in that library). Just to detect
|
||||||
|
// it early.
|
||||||
|
KNF_CHECK_EQ(energy, energy); // check that energy is not nan
|
||||||
|
}
|
||||||
|
|
||||||
|
if (debug_) {
|
||||||
|
fprintf(stderr, "MEL BANKS:\n");
|
||||||
|
for (int32_t i = 0; i < num_bins; i++)
|
||||||
|
fprintf(stderr, " %f", mel_energies_out[i]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace knf
|
@ -0,0 +1,115 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
// This file is copied/modified from kaldi/src/feat/mel-computations.h
|
||||||
|
#ifndef KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
|
||||||
|
#define KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/feature-window.h"
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
struct MelBanksOptions {
|
||||||
|
int32_t num_bins = 25; // e.g. 25; number of triangular bins
|
||||||
|
float low_freq = 20; // e.g. 20; lower frequency cutoff
|
||||||
|
|
||||||
|
// an upper frequency cutoff; 0 -> no cutoff, negative
|
||||||
|
// ->added to the Nyquist frequency to get the cutoff.
|
||||||
|
float high_freq = 0;
|
||||||
|
|
||||||
|
float vtln_low = 100; // vtln lower cutoff of warping function.
|
||||||
|
|
||||||
|
// vtln upper cutoff of warping function: if negative, added
|
||||||
|
// to the Nyquist frequency to get the cutoff.
|
||||||
|
float vtln_high = -500;
|
||||||
|
|
||||||
|
bool debug_mel = false;
|
||||||
|
// htk_mode is a "hidden" config, it does not show up on command line.
|
||||||
|
// Enables more exact compatibility with HTK, for testing purposes. Affects
|
||||||
|
// mel-energy flooring and reproduces a bug in HTK.
|
||||||
|
bool htk_mode = false;
|
||||||
|
|
||||||
|
std::string ToString() const {
|
||||||
|
std::ostringstream os;
|
||||||
|
os << "num_bins: " << num_bins << "\n";
|
||||||
|
os << "low_freq: " << low_freq << "\n";
|
||||||
|
os << "high_freq: " << high_freq << "\n";
|
||||||
|
os << "vtln_low: " << vtln_low << "\n";
|
||||||
|
os << "vtln_high: " << vtln_high << "\n";
|
||||||
|
os << "debug_mel: " << debug_mel << "\n";
|
||||||
|
os << "htk_mode: " << htk_mode << "\n";
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts);
|
||||||
|
|
||||||
|
class MelBanks {
|
||||||
|
public:
|
||||||
|
static inline float InverseMelScale(float mel_freq) {
|
||||||
|
return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float MelScale(float freq) {
|
||||||
|
return 1127.0f * logf(1.0f + freq / 700.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float VtlnWarpFreq(
|
||||||
|
float vtln_low_cutoff,
|
||||||
|
float vtln_high_cutoff, // discontinuities in warp func
|
||||||
|
float low_freq,
|
||||||
|
float high_freq, // upper+lower frequency cutoffs in
|
||||||
|
// the mel computation
|
||||||
|
float vtln_warp_factor, float freq);
|
||||||
|
|
||||||
|
static float VtlnWarpMelFreq(float vtln_low_cutoff, float vtln_high_cutoff,
|
||||||
|
float low_freq, float high_freq,
|
||||||
|
float vtln_warp_factor, float mel_freq);
|
||||||
|
|
||||||
|
// TODO(fangjun): Remove vtln_warp_factor
|
||||||
|
MelBanks(const MelBanksOptions &opts,
|
||||||
|
const FrameExtractionOptions &frame_opts, float vtln_warp_factor);
|
||||||
|
|
||||||
|
/// Compute Mel energies (note: not log energies).
|
||||||
|
/// At input, "fft_energies" contains the FFT energies (not log).
|
||||||
|
///
|
||||||
|
/// @param fft_energies 1-D array of size num_fft_bins/2+1
|
||||||
|
/// @param mel_energies_out 1-D array of size num_mel_bins
|
||||||
|
void Compute(const float *fft_energies, float *mel_energies_out) const;
|
||||||
|
|
||||||
|
int32_t NumBins() const { return bins_.size(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// center frequencies of bins, numbered from 0 ... num_bins-1.
|
||||||
|
// Needed by GetCenterFreqs().
|
||||||
|
std::vector<float> center_freqs_;
|
||||||
|
|
||||||
|
// the "bins_" vector is a vector, one for each bin, of a pair:
|
||||||
|
// (the first nonzero fft-bin), (the vector of weights).
|
||||||
|
std::vector<std::pair<int32_t, std::vector<float>>> bins_;
|
||||||
|
|
||||||
|
// TODO(fangjun): Remove debug_ and htk_mode_
|
||||||
|
bool debug_;
|
||||||
|
bool htk_mode_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace knf
|
||||||
|
|
||||||
|
#endif // KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
|
@ -0,0 +1,66 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/rfft.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "kaldi-native-fbank/csrc/log.h"
|
||||||
|
|
||||||
|
// see fftsg.c
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
|
||||||
|
#else
|
||||||
|
void rdft(int n, int isgn, double *a, int *ip, double *w);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
class Rfft::RfftImpl {
|
||||||
|
public:
|
||||||
|
explicit RfftImpl(int32_t n) : n_(n), ip_(2 + std::sqrt(n / 2)), w_(n / 2) {
|
||||||
|
KNF_CHECK_EQ(n & (n - 1), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compute(float *in_out) {
|
||||||
|
std::vector<double> d(in_out, in_out + n_);
|
||||||
|
|
||||||
|
Compute(d.data());
|
||||||
|
|
||||||
|
std::copy(d.begin(), d.end(), in_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compute(double *in_out) {
|
||||||
|
// 1 means forward fft
|
||||||
|
rdft(n_, 1, in_out, ip_.data(), w_.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int32_t n_;
|
||||||
|
std::vector<int32_t> ip_;
|
||||||
|
std::vector<double> w_;
|
||||||
|
};
|
||||||
|
|
||||||
|
Rfft::Rfft(int32_t n) : impl_(std::make_unique<RfftImpl>(n)) {}
|
||||||
|
|
||||||
|
Rfft::~Rfft() = default;
|
||||||
|
|
||||||
|
void Rfft::Compute(float *in_out) { impl_->Compute(in_out); }
|
||||||
|
void Rfft::Compute(double *in_out) { impl_->Compute(in_out); }
|
||||||
|
|
||||||
|
} // namespace knf
|
@ -0,0 +1,56 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
*
|
||||||
|
* See LICENSE for clarification regarding multiple authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KALDI_NATIVE_FBANK_CSRC_RFFT_H_
|
||||||
|
#define KALDI_NATIVE_FBANK_CSRC_RFFT_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace knf {
|
||||||
|
|
||||||
|
// n-point Real discrete Fourier transform
|
||||||
|
// where n is a power of 2. n >= 2
|
||||||
|
//
|
||||||
|
// R[k] = sum_j=0^n-1 in[j]*cos(2*pi*j*k/n), 0<=k<=n/2
|
||||||
|
// I[k] = sum_j=0^n-1 in[j]*sin(2*pi*j*k/n), 0<k<n/2
|
||||||
|
class Rfft {
|
||||||
|
public:
|
||||||
|
// @param n Number of fft bins. it should be a power of 2.
|
||||||
|
explicit Rfft(int32_t n);
|
||||||
|
~Rfft();
|
||||||
|
|
||||||
|
/** @param in_out A 1-D array of size n.
|
||||||
|
* On return:
|
||||||
|
* in_out[0] = R[0]
|
||||||
|
* in_out[1] = R[n/2]
|
||||||
|
* for 1 < k < n/2,
|
||||||
|
* in_out[2*k] = R[k]
|
||||||
|
* in_out[2*k+1] = I[k]
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void Compute(float *in_out);
|
||||||
|
void Compute(double *in_out);
|
||||||
|
|
||||||
|
private:
|
||||||
|
class RfftImpl;
|
||||||
|
std::unique_ptr<RfftImpl> impl_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace knf
|
||||||
|
|
||||||
|
#endif // KALDI_NATIVE_FBANK_CSRC_RFFT_H_
|
@ -1,111 +0,0 @@
|
|||||||
# checkout the thirdparty/kaldi/base/kaldi-types.h
|
|
||||||
# compile kaldi without openfst
|
|
||||||
add_definitions("-DCOMPILE_WITHOUT_OPENFST")
|
|
||||||
|
|
||||||
if ((NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/base))
|
|
||||||
file(COPY ../../../../speechx/speechx/kaldi/base DESTINATION ${CMAKE_CURRENT_LIST_DIR})
|
|
||||||
file(COPY ../../../../speechx/speechx/kaldi/feat DESTINATION ${CMAKE_CURRENT_LIST_DIR})
|
|
||||||
file(COPY ../../../../speechx/speechx/kaldi/matrix DESTINATION ${CMAKE_CURRENT_LIST_DIR})
|
|
||||||
file(COPY ../../../../speechx/speechx/kaldi/util DESTINATION ${CMAKE_CURRENT_LIST_DIR})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# kaldi-base
|
|
||||||
add_library(kaldi-base STATIC
|
|
||||||
base/io-funcs.cc
|
|
||||||
base/kaldi-error.cc
|
|
||||||
base/kaldi-math.cc
|
|
||||||
base/kaldi-utils.cc
|
|
||||||
base/timer.cc
|
|
||||||
)
|
|
||||||
target_include_directories(kaldi-base PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
# kaldi-matrix
|
|
||||||
add_library(kaldi-matrix STATIC
|
|
||||||
matrix/compressed-matrix.cc
|
|
||||||
matrix/matrix-functions.cc
|
|
||||||
matrix/kaldi-matrix.cc
|
|
||||||
matrix/kaldi-vector.cc
|
|
||||||
matrix/optimization.cc
|
|
||||||
matrix/packed-matrix.cc
|
|
||||||
matrix/qr.cc
|
|
||||||
matrix/sparse-matrix.cc
|
|
||||||
matrix/sp-matrix.cc
|
|
||||||
matrix/srfft.cc
|
|
||||||
matrix/tp-matrix.cc
|
|
||||||
)
|
|
||||||
target_include_directories(kaldi-matrix PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
target_link_libraries(kaldi-matrix PUBLIC kaldi-base libopenblas)
|
|
||||||
else()
|
|
||||||
target_link_libraries(kaldi-matrix PUBLIC kaldi-base openblas)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# kaldi-util
|
|
||||||
add_library(kaldi-util STATIC
|
|
||||||
util/kaldi-holder.cc
|
|
||||||
util/kaldi-io.cc
|
|
||||||
util/kaldi-semaphore.cc
|
|
||||||
util/kaldi-table.cc
|
|
||||||
util/kaldi-thread.cc
|
|
||||||
util/parse-options.cc
|
|
||||||
util/simple-io-funcs.cc
|
|
||||||
util/simple-options.cc
|
|
||||||
util/text-utils.cc
|
|
||||||
)
|
|
||||||
target_include_directories(kaldi-util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)
|
|
||||||
|
|
||||||
# kaldi-feat-common
|
|
||||||
add_library(kaldi-feat-common STATIC
|
|
||||||
feat/cmvn.cc
|
|
||||||
feat/feature-functions.cc
|
|
||||||
feat/feature-window.cc
|
|
||||||
feat/mel-computations.cc
|
|
||||||
feat/pitch-functions.cc
|
|
||||||
feat/resample.cc
|
|
||||||
feat/signal.cc
|
|
||||||
feat/wave-reader.cc
|
|
||||||
)
|
|
||||||
target_include_directories(kaldi-feat-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
|
|
||||||
|
|
||||||
|
|
||||||
# kaldi-mfcc
|
|
||||||
add_library(kaldi-mfcc STATIC
|
|
||||||
feat/feature-mfcc.cc
|
|
||||||
)
|
|
||||||
target_include_directories(kaldi-mfcc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
|
|
||||||
|
|
||||||
|
|
||||||
# kaldi-fbank
|
|
||||||
add_library(kaldi-fbank STATIC
|
|
||||||
feat/feature-fbank.cc
|
|
||||||
)
|
|
||||||
target_include_directories(kaldi-fbank PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
|
|
||||||
|
|
||||||
|
|
||||||
set(KALDI_LIBRARIES
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-base.a
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-matrix.a
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-util.a
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-feat-common.a
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-mfcc.a
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-fbank.a
|
|
||||||
)
|
|
||||||
|
|
||||||
add_library(libkaldi INTERFACE)
|
|
||||||
add_dependencies(libkaldi kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank)
|
|
||||||
target_include_directories(libkaldi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
if (APPLE)
|
|
||||||
target_link_libraries(libkaldi INTERFACE ${KALDI_LIBRARIES} libopenblas ${GFORTRAN_LIBRARIES_DIR}/libgfortran.a ${GFORTRAN_LIBRARIES_DIR}/libquadmath.a ${GFORTRAN_LIBRARIES_DIR}/libgcc_s.1.1.dylib)
|
|
||||||
elseif (MSVC)
|
|
||||||
target_link_libraries(libkaldi INTERFACE kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank openblas)
|
|
||||||
else()
|
|
||||||
target_link_libraries(libkaldi INTERFACE -Wl,--start-group -Wl,--whole-archive ${KALDI_LIBRARIES} libopenblas.a gfortran -Wl,--no-whole-archive -Wl,--end-group)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
target_compile_definitions(libkaldi INTERFACE "-DCOMPILE_WITHOUT_OPENFST")
|
|
Loading…
Reference in new issue