opt to compile asr,cls,vad; add vad; format code ()

pull/2993/head
Hui Zhang 2 years ago committed by GitHub
parent 78e29c8ec4
commit b35fc01a3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,3 +1,6 @@
engine/common/base/flags.h
engine/common/base/log.h
tools/valgrind* tools/valgrind*
*log *log
fc_patch/* fc_patch/*

@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1)
set(CMAKE_VERBOSE_MAKEFILE on) set(CMAKE_VERBOSE_MAKEFILE on)
# set std-14
set(CMAKE_CXX_STANDARD 14)
include(FetchContent) include(FetchContent)
include(ExternalProject) include(ExternalProject)
@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}") get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch}) set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(CMAKE_CXX_FLAGS)
set(CMAKE_CXX_FLAGS_DEBUG)
set(CMAKE_CXX_FLAGS_RELEASE)
# set std-14
set(CMAKE_CXX_STANDARD 14)
# compiler option # compiler option
# Keep the same with openfst, -fPIC or -fpic # Keep the same with openfst, -fPIC or -fpic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb") SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall") SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")
add_compile_options(-fPIC)
############################################################################### ###############################################################################
# Option Configurations # Option Configurations
############################################################################### ###############################################################################
option(WITH_ASR "build asr" ON)
option(WITH_CLS "build cls" ON)
option(WITH_VAD "build vad" ON)
option(TEST_DEBUG "option for debug" OFF) option(TEST_DEBUG "option for debug" OFF)
option(USE_PROFILING "enable c++ profling" OFF) option(USE_PROFILING "enable c++ profling" OFF)
option(WITH_TESTING "unit test" ON) option(WITH_TESTING "unit test" ON)
@ -47,30 +59,39 @@ option(WITH_TESTING "unit test" ON)
option(USING_GPU "u2 compute on GPU." OFF) option(USING_GPU "u2 compute on GPU." OFF)
############################################################################### ###############################################################################
# Include third party # Include Third Party
############################################################################### ###############################################################################
include(gflags) include(gflags)
include(glog) include(glog)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
# paddle lib
include(paddleinference)
# gtest # gtest
if(WITH_TESTING) if(WITH_TESTING)
include(gtest) # download, build, install gtest include(gtest) # download, build, install gtest
endif() endif()
# fastdeploy
include(fastdeploy)
if(WITH_ASR)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
endif()
###############################################################################
# Find Package
###############################################################################
# python/pybind11/threads # python/pybind11/threads
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
# https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3 # https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
find_package(Python3 COMPONENTS Interpreter Development) find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG) find_package(pybind11 CONFIG)
if(WITH_ASR)
if(Python3_FOUND) if(Python3_FOUND)
message(STATUS "Python3_FOUND = ${Python3_FOUND}") message(STATUS "Python3_FOUND = ${Python3_FOUND}")
message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}") message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
@ -138,11 +159,17 @@ out=':'.join([libs_dir, fluid_dir]); print(out); \
" "
OUTPUT_VARIABLE PADDLE_LIB_DIRS) OUTPUT_VARIABLE PADDLE_LIB_DIRS)
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS}) message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
endif()
add_compile_options(-fPIC)
############################################################################### ###############################################################################
# Add local library # Add local library
############################################################################### ###############################################################################
set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine) set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)
message(STATUS "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
message(STATUS "CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}")
add_subdirectory(engine) add_subdirectory(engine)

@ -4,5 +4,5 @@ set -xe
# the build script had verified in the paddlepaddle docker image. # the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image. # please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
cmake -B build cmake -B build -DWITH_ASR=OFF -DWITH_CLS=OFF
cmake --build build -j cmake --build build -j

@ -8,11 +8,11 @@ windows_x86")
set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_VERBOSE_MAKEFILE ON)
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy) set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz) if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
exec_program("mkdir -p ${FASTDEPLOY_DIR} && exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} && wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} && tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64") mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
endif() endif()
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz) if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7")
endif() endif()
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# fix compiler flags conflict, since fastdeploy using c++11 for project
set(CMAKE_CXX_STANDARD 14)
include_directories(${FASTDEPLOY_INCS}) include_directories(${FASTDEPLOY_INCS})
message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")

@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)
add_subdirectory(asr)
add_subdirectory(common)
add_subdirectory(kaldi) add_subdirectory(kaldi)
add_subdirectory(codelab) add_subdirectory(common)
if(WITH_ASR)
add_subdirectory(asr)
endif()
if(WITH_CLS)
add_subdirectory(cls) add_subdirectory(cls)
endif()
if(WITH_VAD)
add_subdirectory(vad)
endif()
add_subdirectory(codelab)

@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
decoder_ = std::make_unique<CTCPrefixBeamSearch>( decoder_ = std::make_unique<CTCPrefixBeamSearch>(
resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts); resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
} else { } else {
decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts); decoder_ = std::make_unique<TLGDecoder>(
resource.decoder_opts.tlg_decoder_opts);
} }
symbol_table_ = decoder_->WordSymbolTable(); symbol_table_ = decoder_->WordSymbolTable();

@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../ ${CMAKE_CURRENT_SOURCE_DIR}/../
) )
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(base)
add_subdirectory(matrix) add_subdirectory(matrix)
include_directories( include_directories(

@ -0,0 +1,20 @@
if(WITH_ASR)
add_compile_options(-DWITH_ASR)
set(PPS_FLAGS_LIB "fst/flags.h")
set(PPS_GLOB_LIB "fst/log.h")
else()
set(PPS_FLAGS_LIB "gflags/gflags.h")
set(PPS_GLOB_LIB "glog/logging.h")
endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in
${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h")
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/log.h.in
${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h")

@ -14,4 +14,4 @@
#pragma once #pragma once
#include "fst/flags.h" #include "@PPS_FLAGS_LIB@"

@ -14,4 +14,4 @@
#pragma once #pragma once
#include "fst/log.h" #include "@PPS_GLOB_LIB@"

@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
dim_ = mean_stats_.size() - 1; dim_ = mean_stats_.size() - 1;
} }
void CMVN::ReadCMVNFromJson(string cmvn_file) { void CMVN::ReadCMVNFromJson(std::string cmvn_file) {
std::string json_str = ppspeech::ReadFile2String(cmvn_file); std::string json_str = ppspeech::ReadFile2String(cmvn_file);
picojson::value value; picojson::value value;
std::string err; std::string err;

@ -21,6 +21,7 @@
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ #ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ #define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#include <limits>
#include <map> #include <map>
#include "frontend/feature-window.h" #include "frontend/feature-window.h"

@ -7,6 +7,7 @@
#include "frontend/feature-window.h" #include "frontend/feature-window.h"
#include <cmath> #include <cmath>
#include <limits>
#include <vector> #include <vector>
#ifndef M_2PI #ifndef M_2PI

@ -17,12 +17,12 @@
*/ */
#include "frontend/rfft.h" #include "frontend/rfft.h"
#include "base/log.h"
#include <cmath> #include <cmath>
#include <memory>
#include <vector> #include <vector>
#include "base/log.h"
// see fftsg.c // see fftsg.c
#ifdef __cplusplus #ifdef __cplusplus
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w); extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);

@ -31,11 +31,13 @@ Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
/* /*
template<> template<>
template<> template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb); void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float>
&ra, const VectorBase<float> &rb);
template<> template<>
template<> template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb); void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double>
&ra, const VectorBase<double> &rb);
*/ */
template <typename Real> template <typename Real>
@ -61,4 +63,3 @@ inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) {
#endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_ #endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_

File diff suppressed because it is too large Load Diff

@ -62,9 +62,7 @@ class MatrixBase {
} }
/// Gives pointer to raw data (const). /// Gives pointer to raw data (const).
inline const Real* Data() const { inline const Real *Data() const { return data_; }
return data_;
}
/// Gives pointer to raw data (non-const). /// Gives pointer to raw data (non-const).
inline Real *Data() { return data_; } inline Real *Data() { return data_; }
@ -86,7 +84,8 @@ class MatrixBase {
/// Indexing operator, non-const /// Indexing operator, non-const
/// (only checks sizes if compiled with -DKALDI_PARANOID) /// (only checks sizes if compiled with -DKALDI_PARANOID)
inline Real &operator()(MatrixIndexT r, MatrixIndexT c) { inline Real &operator()(MatrixIndexT r, MatrixIndexT c) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < KALDI_PARANOID_ASSERT(
static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) && static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) < static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_)); static_cast<UnsignedMatrixIndexT>(num_cols_));
@ -99,7 +98,8 @@ class MatrixBase {
/// Indexing operator, const /// Indexing operator, const
/// (only checks sizes if compiled with -DKALDI_PARANOID) /// (only checks sizes if compiled with -DKALDI_PARANOID)
inline const Real operator()(MatrixIndexT r, MatrixIndexT c) const { inline const Real operator()(MatrixIndexT r, MatrixIndexT c) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < KALDI_PARANOID_ASSERT(
static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) && static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) < static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_)); static_cast<UnsignedMatrixIndexT>(num_cols_));
@ -138,7 +138,8 @@ class MatrixBase {
/// if v.Dim() == NumCols(), it sets each row of *this to a copy of v. /// if v.Dim() == NumCols(), it sets each row of *this to a copy of v.
void CopyRowsFromVec(const VectorBase<Real> &v); void CopyRowsFromVec(const VectorBase<Real> &v);
/// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc /// This version of CopyRowsFromVec is implemented in
/// ../cudamatrix/cu-vector.cc
// void CopyRowsFromVec(const CuVectorBase<Real> &v); // void CopyRowsFromVec(const CuVectorBase<Real> &v);
template <typename OtherReal> template <typename OtherReal>
@ -177,8 +178,8 @@ class MatrixBase {
const MatrixIndexT num_rows, const MatrixIndexT num_rows,
const MatrixIndexT col_offset, const MatrixIndexT col_offset,
const MatrixIndexT num_cols) const { const MatrixIndexT num_cols) const {
return SubMatrix<Real>(*this, row_offset, num_rows, return SubMatrix<Real>(
col_offset, num_cols); *this, row_offset, num_rows, col_offset, num_cols);
} }
inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset, inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset,
const MatrixIndexT num_rows) const { const MatrixIndexT num_rows) const {
@ -223,7 +224,8 @@ class MatrixBase {
/// each row by a scalar taken from that dimension of the vector. /// each row by a scalar taken from that dimension of the vector.
void MulRowsVec(const VectorBase<Real> &scale); void MulRowsVec(const VectorBase<Real> &scale);
/// Divide each row into src.NumCols() equal groups, and then scale i'th row's /// Divide each row into src.NumCols() equal groups, and then scale i'th
row's
/// j'th group of elements by src(i, j). Requires src.NumRows() == /// j'th group of elements by src(i, j). Requires src.NumRows() ==
/// this->NumRows() and this->NumCols() % src.NumCols() == 0. /// this->NumRows() and this->NumCols() % src.NumCols() == 0.
void MulRowsGroupMat(const MatrixBase<Real> &src); void MulRowsGroupMat(const MatrixBase<Real> &src);
@ -256,15 +258,13 @@ class MatrixBase {
/// As a special case, if indexes[i] == -1, sets column i to zero. /// As a special case, if indexes[i] == -1, sets column i to zero.
/// all elements of "indices" must be in [-1, src.NumCols()-1], /// all elements of "indices" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows() /// and src.NumRows() must equal this.NumRows()
void CopyCols(const MatrixBase<Real> &src, void CopyCols(const MatrixBase<Real> &src, const MatrixIndexT *indices);
const MatrixIndexT *indices);
/// Copies row r from row indices[r] of src (does nothing /// Copies row r from row indices[r] of src (does nothing
/// As a special case, if indexes[i] == -1, sets row i to zero. /// As a special case, if indexes[i] == -1, sets row i to zero.
/// all elements of "indices" must be in [-1, src.NumRows()-1], /// all elements of "indices" must be in [-1, src.NumRows()-1],
/// and src.NumCols() must equal this.NumCols() /// and src.NumCols() must equal this.NumCols()
void CopyRows(const MatrixBase<Real> &src, void CopyRows(const MatrixBase<Real> &src, const MatrixIndexT *indices);
const MatrixIndexT *indices);
/// Add column indices[r] of src to column r. /// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i /// As a special case, if indexes[i] == -1, skip column i
@ -274,26 +274,30 @@ class MatrixBase {
// void AddCols(const MatrixBase<Real> &src, // void AddCols(const MatrixBase<Real> &src,
// const MatrixIndexT *indices); // const MatrixIndexT *indices);
/// Copies row r of this matrix from an array of floats at the location given /// Copies row r of this matrix from an array of floats at the location
/// given
/// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero. /// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero.
/// Note: we are using "pointer to const pointer to const object" for "src", /// Note: we are using "pointer to const pointer to const object" for "src",
/// because we may create "src" by calling Data() of const CuArray /// because we may create "src" by calling Data() of const CuArray
void CopyRows(const Real *const *src); void CopyRows(const Real *const *src);
/// Copies row r of this matrix to the array of floats at the location given /// Copies row r of this matrix to the array of floats at the location given
/// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that none /// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that
/// none
/// of the memory regions pointed to by the pointers in "dst" overlap (e.g. /// of the memory regions pointed to by the pointers in "dst" overlap (e.g.
/// none of the pointers should be the same). /// none of the pointers should be the same).
void CopyToRows(Real *const *dst) const; void CopyToRows(Real *const *dst) const;
/// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
/// If indexes[r] < 0, does not add anything. all elements of "indexes" must /// If indexes[r] < 0, does not add anything. all elements of "indexes" must
/// be in [-1, src.NumRows()-1], and src.NumCols() must equal this.NumCols(). /// be in [-1, src.NumRows()-1], and src.NumCols() must equal
/// this.NumCols().
// void AddRows(Real alpha, // void AddRows(Real alpha,
// const MatrixBase<Real> &src, // const MatrixBase<Real> &src,
// const MatrixIndexT *indexes); // const MatrixIndexT *indexes);
/// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the /// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as
/// the
/// beginning of a region of memory representing a vector of floats, of the /// beginning of a region of memory representing a vector of floats, of the
/// same length as this.NumCols(). If src[r] is NULL, does not add anything. /// same length as this.NumCols(). If src[r] is NULL, does not add anything.
// void AddRows(Real alpha, const Real *const *src); // void AddRows(Real alpha, const Real *const *src);
@ -350,48 +354,63 @@ class MatrixBase {
this -> Log(*this); this -> Log(*this);
} }
*/ */
/// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) =
/// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is /// P D
/// slightly complicated, due to the need for P to be real. In the symmetric /// P^{-1}. Be careful: the relationship of D to the eigenvalues we output
/// is
/// slightly complicated, due to the need for P to be real. In the
/// symmetric
/// case D is diagonal and real, but in /// case D is diagonal and real, but in
/// the non-symmetric case there may be complex-conjugate pairs of eigenvalues. /// the non-symmetric case there may be complex-conjugate pairs of
/// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually /// eigenvalues.
/// be block diagonal, with 2x2 blocks corresponding to any such pairs. If a /// In this case, for the equation (*this) = P D P^{-1} to hold, D must
/// actually
/// be block diagonal, with 2x2 blocks corresponding to any such pairs. If
/// a
/// pair is lambda +- i*mu, D will have a corresponding 2x2 block /// pair is lambda +- i*mu, D will have a corresponding 2x2 block
/// [lambda, mu; -mu, lambda]. /// [lambda, mu; -mu, lambda].
/// Note that if the input matrix (*this) is non-invertible, P may not be invertible /// Note that if the input matrix (*this) is non-invertible, P may not be
/// so in this case instead of the equation (*this) = P D P^{-1} holding, we have /// invertible
/// so in this case instead of the equation (*this) = P D P^{-1} holding, we
/// have
/// instead (*this) P = P D. /// instead (*this) P = P D.
/// ///
/// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag. /// The non-member function CreateEigenvalueMatrix creates D from eigs_real
/// and eigs_imag.
// void Eig(MatrixBase<Real> *P, // void Eig(MatrixBase<Real> *P,
// VectorBase<Real> *eigs_real, // VectorBase<Real> *eigs_real,
// VectorBase<Real> *eigs_imag) const; // VectorBase<Real> *eigs_imag) const;
/// The Power method attempts to take the matrix to a power using a method that /// The Power method attempts to take the matrix to a power using a method
/// works in general for fractional and negative powers. The input matrix must /// that
/// works in general for fractional and negative powers. The input matrix
/// must
/// be invertible and have reasonable condition (or we don't guarantee the /// be invertible and have reasonable condition (or we don't guarantee the
/// results. The method is based on the eigenvalue decomposition. It will /// results. The method is based on the eigenvalue decomposition. It will
/// return false and leave the matrix unchanged, if at entry the matrix had /// return false and leave the matrix unchanged, if at entry the matrix had
/// real negative eigenvalues (or if it had zero eigenvalues and the power was /// real negative eigenvalues (or if it had zero eigenvalues and the power
/// was
/// negative). /// negative).
// bool Power(Real pow); // bool Power(Real pow);
/** Singular value decomposition /** Singular value decomposition
Major limitations: Major limitations:
For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we
return
the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the
one on the left is rectangular. one on the left is rectangular.
In Svd, *this = U*diag(S)*Vt. In Svd, *this = U*diag(S)*Vt.
Null pointers for U and/or Vt at input mean we do not want that output. We Null pointers for U and/or Vt at input mean we do not want that output.
We
expect that S.Dim() == m, U is either NULL or m by n, expect that S.Dim() == m, U is either NULL or m by n,
and v is either NULL or n by n. and v is either NULL or n by n.
The singular values are not sorted (use SortSvd for that). */ The singular values are not sorted (use SortSvd for that). */
// void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U, // void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
// MatrixBase<Real> *Vt); // Destroys calling matrix. // MatrixBase<Real> *Vt); // Destroys calling matrix.
/// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already /// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is
/// already
/// transposed; the normal formulation is U diag(s) V^T. /// transposed; the normal formulation is U diag(s) V^T.
/// Null pointers for U or V mean we don't want that output (this saves /// Null pointers for U or V mean we don't want that output (this saves
/// compute). The singular values are not sorted (use SortSvd for that). /// compute). The singular values are not sorted (use SortSvd for that).
@ -408,7 +427,8 @@ class MatrixBase {
// return tmp.Min(); // return tmp.Min();
//} //}
//void TestUninitialized() const; // This function is designed so that if any element // void TestUninitialized() const; // This function is designed so that if
// any element
// if the matrix is uninitialized memory, valgrind will complain. // if the matrix is uninitialized memory, valgrind will complain.
/// Returns condition number by computing Svd. Works even if cols > rows. /// Returns condition number by computing Svd. Works even if cols > rows.
@ -422,16 +442,19 @@ class MatrixBase {
/// Returns true if matrix is Diagonal. /// Returns true if matrix is Diagonal.
bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if the matrix is all zeros, except for ones on diagonal. (it /// Returns true if the matrix is all zeros, except for ones on diagonal.
(it
/// does not have to be square). More specifically, this function returns /// does not have to be square). More specifically, this function returns
/// false if for any i, j, (*this)(i, j) differs by more than cutoff from the /// false if for any i, j, (*this)(i, j) differs by more than cutoff from
the
/// expression (i == j ? 1 : 0). /// expression (i == j ? 1 : 0).
bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if matrix is all zeros. /// Returns true if matrix is all zeros.
bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number
/// Frobenius norm, which is the sqrt of sum of square elements. Same as Schatten 2-norm, /// Frobenius norm, which is the sqrt of sum of square elements. Same as
Schatten 2-norm,
/// or just "2-norm". /// or just "2-norm".
Real FrobeniusNorm() const; Real FrobeniusNorm() const;
@ -461,7 +484,8 @@ class MatrixBase {
/// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the /// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the
/// corresponding element in "src". Note: in general you can make different /// corresponding element in "src". Note: in general you can make different
/// choices for x = 0, but for now please leave it as it (i.e. returning zero) /// choices for x = 0, but for now please leave it as it (i.e. returning
zero)
/// because it affects the RectifiedLinearComponent in the neural net code. /// because it affects the RectifiedLinearComponent in the neural net code.
void Heaviside(const MatrixBase<Real> &src); void Heaviside(const MatrixBase<Real> &src);
@ -477,7 +501,8 @@ class MatrixBase {
/// If the power is negative and the input to the power is zero, /// If the power is negative and the input to the power is zero,
/// The output will be set zero. If include_sign is true, it will /// The output will be set zero. If include_sign is true, it will
/// multiply the result by the sign of the input. /// multiply the result by the sign of the input.
void PowAbs(const MatrixBase<Real> &src, Real power, bool include_sign=false); void PowAbs(const MatrixBase<Real> &src, Real power, bool
include_sign=false);
void Floor(const MatrixBase<Real> &src, Real floor_val); void Floor(const MatrixBase<Real> &src, Real floor_val);
@ -492,36 +517,52 @@ class MatrixBase {
/// Floor(src, lower_limit); /// Floor(src, lower_limit);
/// Ceiling(src, upper_limit); /// Ceiling(src, upper_limit);
/// Exp(src) /// Exp(src)
void ExpLimited(const MatrixBase<Real> &src, Real lower_limit, Real upper_limit); void ExpLimited(const MatrixBase<Real> &src, Real lower_limit, Real
upper_limit);
/// Set each element to y = log(1 + exp(x)) /// Set each element to y = log(1 + exp(x))
void SoftHinge(const MatrixBase<Real> &src); void SoftHinge(const MatrixBase<Real> &src);
/// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p). /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 /
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0. p).
/// Requires src.NumRows() == this->NumRows() and src.NumCols() %
this->NumCols() == 0.
void GroupPnorm(const MatrixBase<Real> &src, Real power); void GroupPnorm(const MatrixBase<Real> &src, Real power);
/// Calculate derivatives for the GroupPnorm function above... /// Calculate derivatives for the GroupPnorm function above...
/// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable), /// if "input" is the input to the GroupPnorm function above (i.e. the "src"
/// and "output" is the result of the computation (i.e. the "this" of that function variable),
/// call), and *this has the same dimension as "input", then it sets each element /// and "output" is the result of the computation (i.e. the "this" of that
/// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where function
/// "output-elem" is whichever element of output depends on that input element. /// call), and *this has the same dimension as "input", then it sets each
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output, element
/// of *this to the derivative d(output-elem)/d(input-elem) for each element
of "input", where
/// "output-elem" is whichever element of output depends on that input
element.
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
&output,
Real power); Real power);
/// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j /// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0. /// Requires src.NumRows() == this->NumRows() and src.NumCols() %
this->NumCols() == 0.
void GroupMax(const MatrixBase<Real> &src); void GroupMax(const MatrixBase<Real> &src);
/// Calculate derivatives for the GroupMax function above, where /// Calculate derivatives for the GroupMax function above, where
/// "input" is the input to the GroupMax function above (i.e. the "src" variable), /// "input" is the input to the GroupMax function above (i.e. the "src"
/// and "output" is the result of the computation (i.e. the "this" of that function variable),
/// and "output" is the result of the computation (i.e. the "this" of that
function
/// call), and *this must have the same dimension as "input". Each element /// call), and *this must have the same dimension as "input". Each element
/// of *this will be set to 1 if the corresponding input equals the output of /// of *this will be set to 1 if the corresponding input equals the output
/// the group, and 0 otherwise. The equals the function derivative where it is of
/// defined (it's not defined where multiple inputs in the group are equal to the output). /// the group, and 0 otherwise. The equals the function derivative where it
void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output); is
/// defined (it's not defined where multiple inputs in the group are equal
to the output).
void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
&output);
/// Set each element to the tanh of the corresponding element of "src". /// Set each element to the tanh of the corresponding element of "src".
void Tanh(const MatrixBase<Real> &src); void Tanh(const MatrixBase<Real> &src);
@ -540,11 +581,14 @@ class MatrixBase {
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not * orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
* positive semi-definite (check_thresh controls how stringent the check is; * positive semi-definite (check_thresh controls how stringent the check is;
* set it to 2 to ensure it won't ever complain, but it will zero out negative * set it to 2 to ensure it won't ever complain, but it will zero out
* negative
* dimensions in your matrix. * dimensions in your matrix.
* *
* Caution: if you want the eigenvalues, it may make more sense to convert to * Caution: if you want the eigenvalues, it may make more sense to convert
* SpMatrix and use Eig() function there, which uses eigenvalue decomposition * to
* SpMatrix and use Eig() function there, which uses eigenvalue
* decomposition
* directly rather than SVD. * directly rather than SVD.
*/ */
@ -557,18 +601,18 @@ class MatrixBase {
// Below is internal methods for Svd, user does not have to know about this. // Below is internal methods for Svd, user does not have to know about this.
protected: protected:
/// Initializer, callable only from child. /// Initializer, callable only from child.
explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) : explicit MatrixBase(Real *data,
data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) { MatrixIndexT cols,
MatrixIndexT rows,
MatrixIndexT stride)
: data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real); KALDI_ASSERT_IS_FLOATING_TYPE(Real);
} }
/// Initializer, callable only from child. /// Initializer, callable only from child.
/// Empty initializer, for un-initialized matrix. /// Empty initializer, for un-initialized matrix.
explicit MatrixBase(): data_(NULL) { explicit MatrixBase() : data_(NULL) { KALDI_ASSERT_IS_FLOATING_TYPE(Real); }
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
// Make sure pointers to MatrixBase cannot be deleted. // Make sure pointers to MatrixBase cannot be deleted.
~MatrixBase() {} ~MatrixBase() {}
@ -578,9 +622,7 @@ class MatrixBase {
/// "public const" inheritance or anything like that, so it would require /// "public const" inheritance or anything like that, so it would require
/// a lot of work to make the SubMatrix class totally const-correct-- /// a lot of work to make the SubMatrix class totally const-correct--
/// we would have to override many of the Matrix functions. /// we would have to override many of the Matrix functions.
inline Real* Data_workaround() const { inline Real *Data_workaround() const { return data_; }
return data_;
}
/// data memory area /// data memory area
Real *data_; Real *data_;
@ -592,6 +634,7 @@ class MatrixBase {
/** True number of columns for the internal matrix. This number may differ /** True number of columns for the internal matrix. This number may differ
* from num_cols_ as memory alignment might be used. */ * from num_cols_ as memory alignment might be used. */
MatrixIndexT stride_; MatrixIndexT stride_;
private: private:
KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase); KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase);
}; };
@ -600,15 +643,17 @@ class MatrixBase {
template <typename Real> template <typename Real>
class Matrix : public MatrixBase<Real> { class Matrix : public MatrixBase<Real> {
public: public:
/// Empty constructor. /// Empty constructor.
Matrix(); Matrix();
/// Basic constructor. /// Basic constructor.
Matrix(const MatrixIndexT r, const MatrixIndexT c, Matrix(const MatrixIndexT r,
const MatrixIndexT c,
MatrixResizeType resize_type = kSetZero, MatrixResizeType resize_type = kSetZero,
MatrixStrideType stride_type = kDefaultStride): MatrixStrideType stride_type = kDefaultStride)
MatrixBase<Real>() { Resize(r, c, resize_type, stride_type); } : MatrixBase<Real>() {
Resize(r, c, resize_type, stride_type);
}
/// Swaps the contents of *this and *other. Shallow swap. /// Swaps the contents of *this and *other. Shallow swap.
void Swap(Matrix<Real> *other); void Swap(Matrix<Real> *other);
@ -694,13 +739,14 @@ class Matrix : public MatrixBase<Real> {
void Destroy(); void Destroy();
/// Init assumes the current class contents are invalid (i.e. junk or have /// Init assumes the current class contents are invalid (i.e. junk or have
/// already been freed), and it sets the matrix to newly allocated memory with /// already been freed), and it sets the matrix to newly allocated memory
/// the specified number of rows and columns. r == c == 0 is acceptable. The data /// with
/// the specified number of rows and columns. r == c == 0 is acceptable.
/// The data
/// memory contents will be undefined. /// memory contents will be undefined.
void Init(const MatrixIndexT r, void Init(const MatrixIndexT r,
const MatrixIndexT c, const MatrixIndexT c,
const MatrixStrideType stride_type); const MatrixStrideType stride_type);
}; };
/// @} end "addtogroup matrix_group" /// @} end "addtogroup matrix_group"
@ -735,9 +781,9 @@ class SubMatrix : public MatrixBase<Real> {
/// This type of constructor is needed for Range() to work [in Matrix base /// This type of constructor is needed for Range() to work [in Matrix base
/// class]. Cannot make it explicit. /// class]. Cannot make it explicit.
SubMatrix<Real> (const SubMatrix &other): SubMatrix<Real>(const SubMatrix &other)
MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_, : MatrixBase<Real>(
other.stride_) {} other.data_, other.num_cols_, other.num_rows_, other.stride_) {}
private: private:
/// Disallow assignment. /// Disallow assignment.
@ -794,25 +840,33 @@ Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
/// the same as U->NumCols(), and we sort s from greatest to least absolute /// the same as U->NumCols(), and we sort s from greatest to least absolute
/// value (if sort_on_absolute_value == true) or greatest to least value /// value (if sort_on_absolute_value == true) or greatest to least value
/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it /// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it
/// exists, around in the same way. Note: the "absolute value" part won't matter /// exists, around in the same way. Note: the "absolute value" part won't
matter
/// if this is an actual SVD, since singular values are non-negative. /// if this is an actual SVD, since singular values are non-negative.
template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U, template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real>* Vt = NULL, MatrixBase<Real>* Vt = NULL,
bool sort_on_absolute_value = true); bool sort_on_absolute_value = true);
/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig. /// Creates the eigenvalue matrix D that is part of the decomposition used
Matrix::Eig.
/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2 /// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2
/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a corresponding /// for complex pairs. If a complex pair is lambda +- i*mu, D will have a
corresponding
/// 2x2 block [lambda, mu; -mu, lambda]. /// 2x2 block [lambda, mu; -mu, lambda].
/// This function will throw if any complex eigenvalues are not in complex conjugate /// This function will throw if any complex eigenvalues are not in complex
conjugate
/// pairs (or the members of such pairs are not consecutively numbered). /// pairs (or the members of such pairs are not consecutively numbered).
template<typename Real> template<typename Real>
void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> &imag, void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real>
&imag,
MatrixBase<Real> *D); MatrixBase<Real> *D);
/// The following function is used in Matrix::Power, and separately tested, so we /// The following function is used in Matrix::Power, and separately tested, so
/// declare it here mainly for the testing code to see. It takes a complex value to we
/// a power using a method that will work for noninteger powers (but will fail if the /// declare it here mainly for the testing code to see. It takes a complex
value to
/// a power using a method that will work for noninteger powers (but will fail
if the
/// complex value is real and negative). /// complex value is real and negative).
template<typename Real> template<typename Real>
bool AttemptComplexPower(Real *x_re, Real *x_im, Real power); bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
@ -844,7 +898,6 @@ bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
} // namespace kaldi } // namespace kaldi
// we need to include the implementation and some // we need to include the implementation and some
// template specializations. // template specializations.
#include "matrix/kaldi-matrix-inl.h" #include "matrix/kaldi-matrix-inl.h"

@ -46,7 +46,8 @@ std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
// template<> // template<>
// template<> // template<>
//void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv); // void VectorBase<float>::AddVec(const float alpha, const VectorBase<float>
// &rv);
// template<> // template<>
// template<> // template<>

@ -23,9 +23,9 @@
// See the Apache 2 License for the specific language governing permissions and // See the Apache 2 License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "matrix/kaldi-vector.h"
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h" #include "matrix/kaldi-matrix.h"
namespace kaldi { namespace kaldi {
@ -54,19 +54,24 @@ inline void Vector<Real>::Init(const MatrixIndexT dim) {
template <typename Real> template <typename Real>
void Vector<Real>::Resize(const MatrixIndexT dim, MatrixResizeType resize_type) { void Vector<Real>::Resize(const MatrixIndexT dim,
MatrixResizeType resize_type) {
// the next block uses recursion to handle what we have to do if // the next block uses recursion to handle what we have to do if
// resize_type == kCopyData. // resize_type == kCopyData.
if (resize_type == kCopyData) { if (resize_type == kCopyData) {
if (this->data_ == NULL || dim == 0) resize_type = kSetZero; // nothing to copy. if (this->data_ == NULL || dim == 0)
else if (this->dim_ == dim) { return; } // nothing to do. resize_type = kSetZero; // nothing to copy.
else if (this->dim_ == dim) {
return;
} // nothing to do.
else { else {
// set tmp to a vector of the desired size. // set tmp to a vector of the desired size.
Vector<Real> tmp(dim, kUndefined); Vector<Real> tmp(dim, kUndefined);
if (dim > this->dim_) { if (dim > this->dim_) {
memcpy(tmp.data_, this->data_, sizeof(Real) * this->dim_); memcpy(tmp.data_, this->data_, sizeof(Real) * this->dim_);
memset(tmp.data_+this->dim_, 0, sizeof(Real)*(dim-this->dim_)); memset(tmp.data_ + this->dim_,
0,
sizeof(Real) * (dim - this->dim_));
} else { } else {
memcpy(tmp.data_, this->data_, sizeof(Real) * dim); memcpy(tmp.data_, this->data_, sizeof(Real) * dim);
} }
@ -107,10 +112,14 @@ void VectorBase<Real>::CopyFromPacked(const PackedMatrix<OtherReal>& M) {
this->CopyFromVec(v); this->CopyFromVec(v);
} }
// instantiate the template. // instantiate the template.
template void VectorBase<float>::CopyFromPacked(const PackedMatrix<double> &other); template void VectorBase<float>::CopyFromPacked(const PackedMatrix<double>
template void VectorBase<float>::CopyFromPacked(const PackedMatrix<float> &other); &other);
template void VectorBase<double>::CopyFromPacked(const PackedMatrix<double> &other); template void VectorBase<float>::CopyFromPacked(const PackedMatrix<float>
template void VectorBase<double>::CopyFromPacked(const PackedMatrix<float> &other); &other);
template void VectorBase<double>::CopyFromPacked(const PackedMatrix<double>
&other);
template void VectorBase<double>::CopyFromPacked(const PackedMatrix<float>
&other);
/// Load data into the vector /// Load data into the vector
template<typename Real> template<typename Real>
@ -125,8 +134,7 @@ void VectorBase<Real>::CopyFromVec(const VectorBase<OtherReal> &other) {
KALDI_ASSERT(dim_ == other.Dim()); KALDI_ASSERT(dim_ == other.Dim());
Real *__restrict__ ptr = data_; Real *__restrict__ ptr = data_;
const OtherReal *__restrict__ other_ptr = other.Data(); const OtherReal *__restrict__ other_ptr = other.Data();
for (MatrixIndexT i = 0; i < dim_; i++) for (MatrixIndexT i = 0; i < dim_; i++) ptr[i] = other_ptr[i];
ptr[i] = other_ptr[i];
} }
template void VectorBase<float>::CopyFromVec(const VectorBase<double> &other); template void VectorBase<float>::CopyFromVec(const VectorBase<double> &other);
@ -146,8 +154,7 @@ void Vector<Real>::RemoveElement(MatrixIndexT i) {
template <typename Real> template <typename Real>
void Vector<Real>::Destroy() { void Vector<Real>::Destroy() {
/// we need to free the data block if it was defined /// we need to free the data block if it was defined
if (this->data_ != NULL) if (this->data_ != NULL) KALDI_MEMALIGN_FREE(this->data_);
KALDI_MEMALIGN_FREE(this->data_);
this->data_ = NULL; this->data_ = NULL;
this->dim_ = 0; this->dim_ = 0;
} }
@ -209,7 +216,9 @@ void VectorBase<Real>::Set(Real f) {
if (f == 0) { if (f == 0) {
this->SetZero(); // calls std::memset this->SetZero(); // calls std::memset
} else { } else {
for (MatrixIndexT i = 0; i < dim_; i++) { data_[i] = f; } for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = f;
}
} }
} }
@ -237,8 +246,7 @@ template<typename OtherReal>
void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<OtherReal> &mat) { void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<OtherReal> &mat) {
KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows());
Real *vec_data = data_; Real *vec_data = data_;
const MatrixIndexT cols = mat.NumCols(), const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows();
rows = mat.NumRows();
for (MatrixIndexT i = 0; i < rows; i++) { for (MatrixIndexT i = 0; i < rows; i++) {
const OtherReal *mat_row = mat.RowData(i); const OtherReal *mat_row = mat.RowData(i);
@ -249,10 +257,8 @@ void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<OtherReal> &mat) {
} }
} }
template template void VectorBase<float>::CopyRowsFromMat(const MatrixBase<double> &mat);
void VectorBase<float>::CopyRowsFromMat(const MatrixBase<double> &mat); template void VectorBase<double>::CopyRowsFromMat(const MatrixBase<float> &mat);
template
void VectorBase<double>::CopyRowsFromMat(const MatrixBase<float> &mat);
template <typename Real> template <typename Real>
@ -260,7 +266,8 @@ void VectorBase<Real>::CopyColsFromMat(const MatrixBase<Real> &mat) {
KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows());
Real *inc_data = data_; Real *inc_data = data_;
const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(), stride = mat.Stride(); const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(),
stride = mat.Stride();
const Real *mat_inc_data = mat.Data(); const Real *mat_inc_data = mat.Data();
for (MatrixIndexT i = 0; i < cols; i++) { for (MatrixIndexT i = 0; i < cols; i++) {
@ -273,7 +280,8 @@ void VectorBase<Real>::CopyColsFromMat(const MatrixBase<Real> &mat) {
} }
template <typename Real> template <typename Real>
void VectorBase<Real>::CopyRowFromMat(const MatrixBase<Real> &mat, MatrixIndexT row) { void VectorBase<Real>::CopyRowFromMat(const MatrixBase<Real> &mat,
MatrixIndexT row) {
KALDI_ASSERT(row < mat.NumRows()); KALDI_ASSERT(row < mat.NumRows());
KALDI_ASSERT(dim_ == mat.NumCols()); KALDI_ASSERT(dim_ == mat.NumCols());
const Real *mat_row = mat.RowData(row); const Real *mat_row = mat.RowData(row);
@ -282,7 +290,8 @@ void VectorBase<Real>::CopyRowFromMat(const MatrixBase<Real> &mat, MatrixIndexT
template <typename Real> template <typename Real>
template <typename OtherReal> template <typename OtherReal>
void VectorBase<Real>::CopyRowFromMat(const MatrixBase<OtherReal> &mat, MatrixIndexT row) { void VectorBase<Real>::CopyRowFromMat(const MatrixBase<OtherReal> &mat,
MatrixIndexT row) {
KALDI_ASSERT(row < mat.NumRows()); KALDI_ASSERT(row < mat.NumRows());
KALDI_ASSERT(dim_ == mat.NumCols()); KALDI_ASSERT(dim_ == mat.NumCols());
const OtherReal *mat_row = mat.RowData(row); const OtherReal *mat_row = mat.RowData(row);
@ -290,15 +299,16 @@ void VectorBase<Real>::CopyRowFromMat(const MatrixBase<OtherReal> &mat, MatrixIn
data_[i] = static_cast<Real>(mat_row[i]); data_[i] = static_cast<Real>(mat_row[i]);
} }
template template void VectorBase<float>::CopyRowFromMat(const MatrixBase<double> &mat,
void VectorBase<float>::CopyRowFromMat(const MatrixBase<double> &mat, MatrixIndexT row); MatrixIndexT row);
template template void VectorBase<double>::CopyRowFromMat(const MatrixBase<float> &mat,
void VectorBase<double>::CopyRowFromMat(const MatrixBase<float> &mat, MatrixIndexT row); MatrixIndexT row);
/* /*
template<typename Real> template<typename Real>
template<typename OtherReal> template<typename OtherReal>
void VectorBase<Real>::CopyRowFromSp(const SpMatrix<OtherReal> &sp, MatrixIndexT row) { void VectorBase<Real>::CopyRowFromSp(const SpMatrix<OtherReal> &sp, MatrixIndexT
row) {
KALDI_ASSERT(row < sp.NumRows()); KALDI_ASSERT(row < sp.NumRows());
KALDI_ASSERT(dim_ == sp.NumCols()); KALDI_ASSERT(dim_ == sp.NumCols());
@ -313,13 +323,17 @@ void VectorBase<Real>::CopyRowFromSp(const SpMatrix<OtherReal> &sp, MatrixIndexT
} }
template template
void VectorBase<float>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row); void VectorBase<float>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT
row);
template template
void VectorBase<double>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT row); void VectorBase<double>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT
row);
template template
void VectorBase<float>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT row); void VectorBase<float>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT
row);
template template
void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row); void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT
row);
// takes absolute value of the elements to a power. // takes absolute value of the elements to a power.
// Throws exception if could not (but only for power != 1 and power != 2). // Throws exception if could not (but only for power != 1 and power != 2).
@ -333,7 +347,8 @@ void VectorBase<Real>::ApplyPowAbs(Real power, bool include_sign) {
data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * data_[i] * data_[i]; data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * data_[i] * data_[i];
} else if (power == 0.5) { } else if (power == 0.5) {
for (MatrixIndexT i = 0; i < dim_; i++) { for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * std::sqrt(std::abs(data_[i])); data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) *
std::sqrt(std::abs(data_[i]));
} }
} else if (power < 0.0) { } else if (power < 0.0) {
for (MatrixIndexT i = 0; i < dim_; i++) { for (MatrixIndexT i = 0; i < dim_; i++) {
@ -346,7 +361,8 @@ void VectorBase<Real>::ApplyPowAbs(Real power, bool include_sign) {
} }
} else { } else {
for (MatrixIndexT i = 0; i < dim_; i++) { for (MatrixIndexT i = 0; i < dim_; i++) {
data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * pow(std::abs(data_[i]), power); data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) *
pow(std::abs(data_[i]), power);
if (data_[i] == HUGE_VAL) { // HUGE_VAL is what errno returns on error. if (data_[i] == HUGE_VAL) { // HUGE_VAL is what errno returns on error.
KALDI_ERR << "Could not raise element " << i << "to power " KALDI_ERR << "Could not raise element " << i << "to power "
<< power << ": returned value = " << data_[i]; << power << ": returned value = " << data_[i];
@ -401,7 +417,8 @@ Real VectorBase<Real>::Norm(Real p) const {
} }
template<typename Real> template<typename Real>
bool VectorBase<Real>::ApproxEqual(const VectorBase<Real> &other, float tol) const { bool VectorBase<Real>::ApproxEqual(const VectorBase<Real> &other, float tol)
const {
if (dim_ != other.dim_) KALDI_ERR << "ApproxEqual: size mismatch " if (dim_ != other.dim_) KALDI_ERR << "ApproxEqual: size mismatch "
<< dim_ << " vs. " << other.dim_; << dim_ << " vs. " << other.dim_;
KALDI_ASSERT(tol >= 0.0); KALDI_ASSERT(tol >= 0.0);
@ -501,22 +518,23 @@ Real VectorBase<Real>::Min(MatrixIndexT *index_out) const {
template <typename Real> template <typename Real>
template <typename OtherReal> template <typename OtherReal>
void VectorBase<Real>::CopyColFromMat(const MatrixBase<OtherReal> &mat, MatrixIndexT col) { void VectorBase<Real>::CopyColFromMat(const MatrixBase<OtherReal> &mat,
MatrixIndexT col) {
KALDI_ASSERT(col < mat.NumCols()); KALDI_ASSERT(col < mat.NumCols());
KALDI_ASSERT(dim_ == mat.NumRows()); KALDI_ASSERT(dim_ == mat.NumRows());
for (MatrixIndexT i = 0; i < dim_; i++) for (MatrixIndexT i = 0; i < dim_; i++) data_[i] = mat(i, col);
data_[i] = mat(i, col); // can't do this very efficiently so don't really bother. could improve this
// can't do this very efficiently so don't really bother. could improve this though. // though.
} }
// instantiate the template above. // instantiate the template above.
template template void VectorBase<float>::CopyColFromMat(const MatrixBase<float> &mat,
void VectorBase<float>::CopyColFromMat(const MatrixBase<float> &mat, MatrixIndexT col); MatrixIndexT col);
template template void VectorBase<float>::CopyColFromMat(const MatrixBase<double> &mat,
void VectorBase<float>::CopyColFromMat(const MatrixBase<double> &mat, MatrixIndexT col); MatrixIndexT col);
template template void VectorBase<double>::CopyColFromMat(const MatrixBase<float> &mat,
void VectorBase<double>::CopyColFromMat(const MatrixBase<float> &mat, MatrixIndexT col); MatrixIndexT col);
template template void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat,
void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixIndexT col); MatrixIndexT col);
// template<typename Real> // template<typename Real>
// void VectorBase<Real>::CopyDiagFromMat(const MatrixBase<Real> &M) { // void VectorBase<Real>::CopyDiagFromMat(const MatrixBase<Real> &M) {
@ -559,12 +577,14 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
// template<typename Real> // template<typename Real>
//void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) { // void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M,
// Real beta) {
// KALDI_ASSERT(dim_ == M.NumCols()); // KALDI_ASSERT(dim_ == M.NumCols());
// MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_; // MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_;
// Real *data = data_; // Real *data = data_;
//// implement the function according to a dimension cutoff for computation efficiency //// implement the function according to a dimension cutoff for computation
///efficiency
// if (num_rows <= 64) { // if (num_rows <= 64) {
// cblas_Xscal(dim, beta, data, 1); // cblas_Xscal(dim, beta, data, 1);
// const Real *m_data = M.Data(); // const Real *m_data = M.Data();
@ -579,11 +599,13 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
// template<typename Real> // template<typename Real>
//void VectorBase<Real>::AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) { // void VectorBase<Real>::AddColSumMat(Real alpha, const MatrixBase<Real> &M,
// Real beta) {
// KALDI_ASSERT(dim_ == M.NumRows()); // KALDI_ASSERT(dim_ == M.NumRows());
// MatrixIndexT num_cols = M.NumCols(); // MatrixIndexT num_cols = M.NumCols();
//// implement the function according to a dimension cutoff for computation efficiency //// implement the function according to a dimension cutoff for computation
///efficiency
// if (num_cols <= 64) { // if (num_cols <= 64) {
// for (MatrixIndexT i = 0; i < dim_; i++) { // for (MatrixIndexT i = 0; i < dim_; i++) {
// double sum = 0.0; // double sum = 0.0;
@ -657,7 +679,8 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
// template<typename Real> // template<typename Real>
//void VectorBase<Real>::Floor(const VectorBase<Real> &v, Real floor_val, MatrixIndexT *floored_count) { // void VectorBase<Real>::Floor(const VectorBase<Real> &v, Real floor_val,
// MatrixIndexT *floored_count) {
// KALDI_ASSERT(dim_ == v.dim_); // KALDI_ASSERT(dim_ == v.dim_);
// if (floored_count == nullptr) { // if (floored_count == nullptr) {
// for (MatrixIndexT i = 0; i < dim_; i++) { // for (MatrixIndexT i = 0; i < dim_; i++) {
@ -678,7 +701,8 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
// template<typename Real> // template<typename Real>
//void VectorBase<Real>::Ceiling(const VectorBase<Real> &v, Real ceil_val, MatrixIndexT *ceiled_count) { // void VectorBase<Real>::Ceiling(const VectorBase<Real> &v, Real ceil_val,
// MatrixIndexT *ceiled_count) {
// KALDI_ASSERT(dim_ == v.dim_); // KALDI_ASSERT(dim_ == v.dim_);
// if (ceiled_count == nullptr) { // if (ceiled_count == nullptr) {
// for (MatrixIndexT i = 0; i < dim_; i++) { // for (MatrixIndexT i = 0; i < dim_; i++) {
@ -699,7 +723,8 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
// template<typename Real> // template<typename Real>
//MatrixIndexT VectorBase<Real>::ApplyFloor(const VectorBase<Real> &floor_vec) { // MatrixIndexT VectorBase<Real>::ApplyFloor(const VectorBase<Real> &floor_vec)
// {
// KALDI_ASSERT(floor_vec.Dim() == dim_); // KALDI_ASSERT(floor_vec.Dim() == dim_);
// MatrixIndexT num_floored = 0; // MatrixIndexT num_floored = 0;
// for (MatrixIndexT i = 0; i < dim_; i++) { // for (MatrixIndexT i = 0; i < dim_; i++) {
@ -820,7 +845,8 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
//} //}
//template<typename Real> // Set each element to y = (x == orig ? changed : x). // template<typename Real> // Set each element to y = (x == orig ? changed :
// x).
// void VectorBase<Real>::ReplaceValue(Real orig, Real changed) { // void VectorBase<Real>::ReplaceValue(Real orig, Real changed) {
// Real *data = data_; // Real *data = data_;
// for (MatrixIndexT i = 0; i < dim_; i++) // for (MatrixIndexT i = 0; i < dim_; i++)
@ -889,7 +915,8 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
// template<typename Real> // template<typename Real>
// template<typename OtherReal> // template<typename OtherReal>
//void VectorBase<Real>::AddVec(const Real alpha, const VectorBase<OtherReal> &v) { // void VectorBase<Real>::AddVec(const Real alpha, const VectorBase<OtherReal>
// &v) {
// KALDI_ASSERT(dim_ == v.dim_); // KALDI_ASSERT(dim_ == v.dim_);
//// remove __restrict__ if it causes compilation problems. //// remove __restrict__ if it causes compilation problems.
// Real *__restrict__ data = data_; // Real *__restrict__ data = data_;
@ -904,13 +931,16 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
// template // template
//void VectorBase<float>::AddVec(const float alpha, const VectorBase<double> &v); // void VectorBase<float>::AddVec(const float alpha, const VectorBase<double>
// &v);
// template // template
//void VectorBase<double>::AddVec(const double alpha, const VectorBase<float> &v); // void VectorBase<double>::AddVec(const double alpha, const VectorBase<float>
// &v);
// template<typename Real> // template<typename Real>
// template<typename OtherReal> // template<typename OtherReal>
//void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<OtherReal> &v) { // void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<OtherReal>
// &v) {
// KALDI_ASSERT(dim_ == v.dim_); // KALDI_ASSERT(dim_ == v.dim_);
//// remove __restrict__ if it causes compilation problems. //// remove __restrict__ if it causes compilation problems.
// Real *__restrict__ data = data_; // Real *__restrict__ data = data_;
@ -925,9 +955,11 @@ void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat, MatrixInd
//} //}
// template // template
//void VectorBase<float>::AddVec2(const float alpha, const VectorBase<double> &v); // void VectorBase<float>::AddVec2(const float alpha, const VectorBase<double>
// &v);
// template // template
//void VectorBase<double>::AddVec2(const double alpha, const VectorBase<float> &v); // void VectorBase<double>::AddVec2(const double alpha, const VectorBase<float>
// &v);
template <typename Real> template <typename Real>
@ -937,8 +969,8 @@ void VectorBase<Real>::Read(std::istream &is, bool binary) {
Vector<Real> tmp; Vector<Real> tmp;
tmp.Read(is, binary); tmp.Read(is, binary);
if (tmp.Dim() != Dim()) if (tmp.Dim() != Dim())
KALDI_ERR << "VectorBase<Real>::Read, size mismatch " KALDI_ERR << "VectorBase<Real>::Read, size mismatch " << Dim()
<< Dim() << " vs. " << tmp.Dim(); << " vs. " << tmp.Dim();
CopyFromVec(tmp); CopyFromVec(tmp);
} }
@ -952,8 +984,13 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
int peekval = Peek(is, binary); int peekval = Peek(is, binary);
const char *my_token = (sizeof(Real) == 4 ? "FV" : "DV"); const char *my_token = (sizeof(Real) == 4 ? "FV" : "DV");
char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F'); char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F');
if (peekval == other_token_start) { // need to instantiate the other type to read it. if (peekval == other_token_start) { // need to instantiate the other
typedef typename OtherReal<Real>::Real OtherType; // if Real == float, OtherType == double, and vice versa. // type to read it.
typedef typename OtherReal<Real>::Real OtherType; // if Real ==
// float,
// OtherType ==
// double, and
// vice versa.
Vector<OtherType> other(this->Dim()); Vector<OtherType> other(this->Dim());
other.Read(is, binary); // add is false at this point. other.Read(is, binary); // add is false at this point.
if (this->Dim() != other.Dim()) this->Resize(other.Dim()); if (this->Dim() != other.Dim()) this->Resize(other.Dim());
@ -964,7 +1001,8 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
ReadToken(is, binary, &token); ReadToken(is, binary, &token);
if (token != my_token) { if (token != my_token) {
if (token.length() > 20) token = token.substr(0, 17) + "..."; if (token.length() > 20) token = token.substr(0, 17) + "...";
specific_error << ": Expected token " << my_token << ", got " << token; specific_error << ": Expected token " << my_token << ", got "
<< token;
goto bad; goto bad;
} }
int32 size; int32 size;
@ -973,20 +1011,29 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
if (size > 0) if (size > 0)
is.read(reinterpret_cast<char *>(this->data_), sizeof(Real) * size); is.read(reinterpret_cast<char *>(this->data_), sizeof(Real) * size);
if (is.fail()) { if (is.fail()) {
specific_error << "Error reading vector data (binary mode); truncated " specific_error
"stream? (size = " << size << ")"; << "Error reading vector data (binary mode); truncated "
"stream? (size = "
<< size << ")";
goto bad; goto bad;
} }
return; return;
} else { // Text mode reading; format is " [ 1.1 2.0 3.4 ]\n" } else { // Text mode reading; format is " [ 1.1 2.0 3.4 ]\n"
std::string s; std::string s;
is >> s; is >> s;
// if ((s.compare("DV") == 0) || (s.compare("FV") == 0)) { // Back compatibility. // if ((s.compare("DV") == 0) || (s.compare("FV") == 0)) { // Back
// compatibility.
// is >> s; // get dimension // is >> s; // get dimension
// is >> s; // get "[" // is >> s; // get "["
// } // }
if (is.fail()) { specific_error << "EOF while trying to read vector."; goto bad; } if (is.fail()) {
if (s.compare("[]") == 0) { Resize(0); return; } // tolerate this variant. specific_error << "EOF while trying to read vector.";
goto bad;
}
if (s.compare("[]") == 0) {
Resize(0);
return;
} // tolerate this variant.
if (s.compare("[")) { if (s.compare("[")) {
if (s.length() > 20) s = s.substr(0, 17) + "..."; if (s.length() > 20) s = s.substr(0, 17) + "...";
specific_error << "Expected \"[\" but got " << s; specific_error << "Expected \"[\" but got " << s;
@ -998,12 +1045,17 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
if (i == '-' || (i >= '0' && i <= '9')) { // common cases first. if (i == '-' || (i >= '0' && i <= '9')) { // common cases first.
Real r; Real r;
is >> r; is >> r;
if (is.fail()) { specific_error << "Failed to read number."; goto bad; } if (is.fail()) {
specific_error << "Failed to read number.";
goto bad;
}
if (!std::isspace(is.peek()) && is.peek() != ']') { if (!std::isspace(is.peek()) && is.peek() != ']') {
specific_error << "Expected whitespace after number."; goto bad; specific_error << "Expected whitespace after number.";
goto bad;
} }
data.push_back(r); data.push_back(r);
// But don't eat whitespace... we want to check that it's not newlines // But don't eat whitespace... we want to check that it's not
// newlines
// which would be valid only for a matrix. // which would be valid only for a matrix.
} else if (i == ' ' || i == '\t') { } else if (i == ' ' || i == '\t') {
is.get(); is.get();
@ -1016,7 +1068,9 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
if (static_cast<char>(i) == '\r') { if (static_cast<char>(i) == '\r') {
is.get(); is.get();
is.get(); // get \r\n (must eat what we wrote) is.get(); // get \r\n (must eat what we wrote)
} else if (static_cast<char>(i) == '\n') { is.get(); } // get \n (must eat what we wrote) } else if (static_cast<char>(i) == '\n') {
is.get();
} // get \n (must eat what we wrote)
if (is.fail()) { if (is.fail()) {
KALDI_WARN << "After end of vector data, read error."; KALDI_WARN << "After end of vector data, read error.";
// we got the data we needed, so just warn for this error. // we got the data we needed, so just warn for this error.
@ -1026,7 +1080,8 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
specific_error << "EOF while reading vector data."; specific_error << "EOF while reading vector data.";
goto bad; goto bad;
} else if (i == '\n' || i == '\r') { } else if (i == '\n' || i == '\r') {
specific_error << "Newline found while reading vector (maybe it's a matrix?)"; specific_error << "Newline found while reading vector (maybe "
"it's a matrix?)";
goto bad; goto bad;
} else { } else {
is >> s; // read string. is >> s; // read string.
@ -1039,7 +1094,8 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
KALDI_WARN << "Reading NaN value into vector."; KALDI_WARN << "Reading NaN value into vector.";
} else { } else {
if (s.length() > 20) s = s.substr(0, 17) + "..."; if (s.length() > 20) s = s.substr(0, 17) + "...";
specific_error << "Expecting numeric vector data, got " << s; specific_error << "Expecting numeric vector data, got "
<< s;
goto bad; goto bad;
} }
} }
@ -1048,8 +1104,8 @@ void Vector<Real>::Read(std::istream &is, bool binary) {
// we never reach this line (the while loop returns directly). // we never reach this line (the while loop returns directly).
bad: bad:
KALDI_ERR << "Failed to read vector from stream. " << specific_error.str() KALDI_ERR << "Failed to read vector from stream. " << specific_error.str()
<< " File position at start is " << " File position at start is " << pos_at_start << ", currently "
<< pos_at_start<<", currently "<<is.tellg(); << is.tellg();
} }
@ -1068,12 +1124,10 @@ void VectorBase<Real>::Write(std::ostream & os, bool binary) const {
os.write(reinterpret_cast<const char *>(Data()), sizeof(Real) * size); os.write(reinterpret_cast<const char *>(Data()), sizeof(Real) * size);
} else { } else {
os << " [ "; os << " [ ";
for (MatrixIndexT i = 0; i < Dim(); i++) for (MatrixIndexT i = 0; i < Dim(); i++) os << (*this)(i) << " ";
os << (*this)(i) << " ";
os << "]\n"; os << "]\n";
} }
if (!os.good()) if (!os.good()) KALDI_ERR << "Failed to write vector to stream";
KALDI_ERR << "Failed to write vector to stream";
} }
@ -1167,8 +1221,10 @@ void Vector<Real>::Swap(Vector<Real> *other) {
// Real *data = this->data_; // Real *data = this->data_;
// const Real *Mdata = M.Data(), *Ndata = N.Data(); // const Real *Mdata = M.Data(), *Ndata = N.Data();
//for (MatrixIndexT i = 0; i < dim; i++, Mdata += M_row_stride, Ndata += N_col_stride, data++) { // for (MatrixIndexT i = 0; i < dim; i++, Mdata += M_row_stride, Ndata +=
//*data = beta * *data + alpha * cblas_Xdot(M_col_dim, Mdata, M_col_stride, Ndata, N_row_stride); // N_col_stride, data++) {
//*data = beta * *data + alpha * cblas_Xdot(M_col_dim, Mdata, M_col_stride,
//Ndata, N_row_stride);
//} //}
//} //}

@ -129,8 +129,10 @@ class VectorBase {
friend class VectorBase<double>; friend class VectorBase<double>;
friend class VectorBase<float>; friend class VectorBase<float>;
protected: protected:
/// Destructor; does not deallocate memory, this is handled by child classes. /// Destructor; does not deallocate memory, this is handled by child
/// classes.
/// This destructor is protected so this object can only be /// This destructor is protected so this object can only be
/// deleted via a child. /// deleted via a child.
~VectorBase() {} ~VectorBase() {}
@ -161,7 +163,9 @@ class Vector: public VectorBase<Real> {
/// if set_zero == false, memory contents are undefined. /// if set_zero == false, memory contents are undefined.
explicit Vector(const MatrixIndexT s, explicit Vector(const MatrixIndexT s,
MatrixResizeType resize_type = kSetZero) MatrixResizeType resize_type = kSetZero)
: VectorBase<Real>() { Resize(s, resize_type); } : VectorBase<Real>() {
Resize(s, resize_type);
}
/// Copy constructor from CUDA vector /// Copy constructor from CUDA vector
/// This is defined in ../cudamatrix/cu-vector.h /// This is defined in ../cudamatrix/cu-vector.h
@ -169,7 +173,8 @@ class Vector: public VectorBase<Real> {
// explicit Vector(const CuVectorBase<OtherReal> &cu); // explicit Vector(const CuVectorBase<OtherReal> &cu);
/// Copy constructor. The need for this is controversial. /// Copy constructor. The need for this is controversial.
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit) Vector(const Vector<Real> &v)
: VectorBase<Real>() { // (cannot be explicit)
Resize(v.Dim(), kUndefined); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
@ -231,16 +236,18 @@ class Vector: public VectorBase<Real> {
this->CopyFromVec(other); this->CopyFromVec(other);
return *this; return *this;
} }
private: private:
/// Init assumes the current contents of the class are invalid (i.e. junk or /// Init assumes the current contents of the class are invalid (i.e. junk or
/// has already been freed), and it sets the vector to newly allocated memory /// has already been freed), and it sets the vector to newly allocated
/// with the specified dimension. dim == 0 is acceptable. The memory contents /// memory
/// with the specified dimension. dim == 0 is acceptable. The memory
/// contents
/// pointed to by data_ will be undefined. /// pointed to by data_ will be undefined.
void Init(const MatrixIndexT dim); void Init(const MatrixIndexT dim);
/// Destroy function, called internally. /// Destroy function, called internally.
void Destroy(); void Destroy();
}; };
@ -252,8 +259,10 @@ class SubVector : public VectorBase<Real> {
/// Constructor from a Vector or SubVector. /// Constructor from a Vector or SubVector.
/// SubVectors are not const-safe and it's very hard to make them /// SubVectors are not const-safe and it's very hard to make them
/// so for now we just give up. This function contains const_cast. /// so for now we just give up. This function contains const_cast.
SubVector(const VectorBase<Real> &t, const MatrixIndexT origin, SubVector(const VectorBase<Real> &t,
const MatrixIndexT length) : VectorBase<Real>() { const MatrixIndexT origin,
const MatrixIndexT length)
: VectorBase<Real>() {
// following assert equiv to origin>=0 && length>=0 && // following assert equiv to origin>=0 && length>=0 &&
// origin+length <= rt.dim_ // origin+length <= rt.dim_
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin) + KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin) +
@ -334,12 +343,10 @@ std::istream & operator >> (std::istream & in, Vector<Real> & v);
//} //}
} // namespace kaldi } // namespace kaldi
// we need to include the implementation // we need to include the implementation
#include "matrix/kaldi-vector-inl.h" #include "matrix/kaldi-vector-inl.h"
#endif // KALDI_MATRIX_KALDI_VECTOR_H_ #endif // KALDI_MATRIX_KALDI_VECTOR_H_

@ -27,18 +27,15 @@
namespace kaldi { namespace kaldi {
// this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library // this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library
// we are writing them as literals because we don't want to include here matrix/kaldi-blas.h, // we are writing them as literals because we don't want to include here
// matrix/kaldi-blas.h,
// which puts many symbols into global scope (like "real") via the header f2c.h // which puts many symbols into global scope (like "real") via the header f2c.h
typedef enum { typedef enum {
kTrans = 112, // = CblasTrans kTrans = 112, // = CblasTrans
kNoTrans = 111 // = CblasNoTrans kNoTrans = 111 // = CblasNoTrans
} MatrixTransposeType; } MatrixTransposeType;
typedef enum { typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType;
kSetZero,
kUndefined,
kCopyData
} MatrixResizeType;
typedef enum { typedef enum {
@ -53,24 +50,33 @@ typedef enum {
kTakeMeanAndCheck kTakeMeanAndCheck
} SpCopyType; } SpCopyType;
template<typename Real> class VectorBase; template <typename Real>
template<typename Real> class Vector; class VectorBase;
template<typename Real> class SubVector; template <typename Real>
template<typename Real> class MatrixBase; class Vector;
template<typename Real> class SubMatrix; template <typename Real>
template<typename Real> class Matrix; class SubVector;
template <typename Real>
class MatrixBase;
template <typename Real>
class SubMatrix;
template <typename Real>
class Matrix;
/// This class provides a way for switching between double and float types. /// This class provides a way for switching between double and float types.
template<typename T> class OtherReal { }; // useful in reading+writing routines template <typename T>
class OtherReal {}; // useful in reading+writing routines
// to switch double and float. // to switch double and float.
/// A specialized class for switching from float to double. /// A specialized class for switching from float to double.
template<> class OtherReal<float> { template <>
class OtherReal<float> {
public: public:
typedef double Real; typedef double Real;
}; };
/// A specialized class for switching from double to float. /// A specialized class for switching from double to float.
template<> class OtherReal<double> { template <>
class OtherReal<double> {
public: public:
typedef float Real; typedef float Real;
}; };
@ -84,9 +90,7 @@ typedef uint32 UnsignedMatrixIndexT;
// typedef size_t MatrixIndexT; // typedef size_t MatrixIndexT;
// typedef ssize_t SignedMatrixIndexT; // typedef ssize_t SignedMatrixIndexT;
// typedef size_t UnsignedMatrixIndexT; // typedef size_t UnsignedMatrixIndexT;
} }
#endif // KALDI_MATRIX_MATRIX_COMMON_H_ #endif // KALDI_MATRIX_MATRIX_COMMON_H_

@ -1,10 +1,10 @@
project(kaldi)
include_directories( include_directories(
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
) )
add_subdirectory(base) add_subdirectory(base)
add_subdirectory(util) add_subdirectory(util)
if(WITH_ASR)
add_subdirectory(lat) add_subdirectory(lat)
add_subdirectory(fstext) add_subdirectory(fstext)
add_subdirectory(decoder) add_subdirectory(decoder)
@ -12,3 +12,4 @@ add_subdirectory(lm)
add_subdirectory(fstbin) add_subdirectory(fstbin)
add_subdirectory(lmbin) add_subdirectory(lmbin)
endif()

@ -44,7 +44,19 @@ typedef float BaseFloat;
#ifndef COMPILE_WITHOUT_OPENFST #ifndef COMPILE_WITHOUT_OPENFST
#ifdef WITH_ASR
#include <fst/types.h> #include <fst/types.h>
#else
using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;
using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;
#endif
namespace kaldi { namespace kaldi {
using ::int16; using ::int16;

@ -0,0 +1,18 @@
# set(CMAKE_CXX_STANDARD 11)
# # fastdeploy
# set(FASTDEPLOY_INSTALL_DIR "fdlib/fastdeploy-linux-x64-1.0.4" CACHE STRING force)
# if(NOT EXISTS ${FASTDEPLOY_INSTALL_DIR})
# message(FATAL_ERROR "Please using cmake -B build -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR}")
# endif()
# include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# # FastDeploy
# include_directories(${FASTDEPLOY_INCS})
add_executable(infer_onnx_silero_vad ${CMAKE_CURRENT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h)
# FastDeploy
target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS})

@ -0,0 +1,121 @@
English | [简体中文](README_CN.md)
# Silero VAD Deployment Example
This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
Before deployment, two steps require confirmation.
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
```bash
mkdir build
cd build
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# inference
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
- The above command works for Linux or MacOS. Refer to:
- [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows
## VAD C++ Interface
### Vad Class
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**Parameter**
> * **model_file**(str): Model file path
> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
### setAudioCofig function
**Must be called before the `init` function**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**Parameter**
> * **sr**(int): sampling rate
> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
> * **threshold**(float): Result probability judgment threshold
> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
> * **speech_pad_ms**(int): Used to calculate the end time of the speech
### init function
Used to initialize audio-related parameters.
```c++
void Vad::init();
```
### loadAudio function
Load audio.
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**Parameter**
> * **wavPath**(str): Audio file path
### Predict function
Used to start model reasoning.
```c++
bool Vad::Predict();
```
### getResult function
**Used to obtain reasoning results**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**Parameter**
> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
**The output result format is**`std::vector<std::map<std::string, float>>`
> Output a list, each element is a speech fragment
>
> Each clip can use 'start' to get the start time and 'end' to get the end time
### Tips
1. `The setAudioCofig`function must be called before the `init` function
2. The sampling rate of the input audio file must be consistent with that set in the code
- [Model Description](../)
- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)

@ -0,0 +1,119 @@
[English](README.md) | 简体中文
# Silero VAD 部署示例
本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境下载预编译部署库和samples代码参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。
```bash
mkdir build
cd build
# 下载FastDeploy预编译库用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# 推理
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md)
## VAD C++ 接口
### Vad 类
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**参数**
> * **model_file**(str): 模型文件路径
> * **runtime_option**(RuntimeOption): 后端推理配置默认为None即采用默认配置
### setAudioCofig 函数
**必须在`init`函数前调用**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**参数**
> * **sr**(int): 采样率
> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小
> * **threshold**(float): 结果概率判断阈值
> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值
> * **speech_pad_ms**(int): 用于计算 speach 结束时刻
### init 函数
用于初始化音频相关参数
```c++
void Vad::init();
```
### loadAudio 函数
加载音频
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**参数**
> * **wavPath**(str): 音频文件路径
### Predict 函数
用于开始模型推理
```c++
bool Vad::Predict();
```
### getResult 函数
**用于获取推理结果**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**参数**
> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃
> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻
> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻
> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段
**输出结果格式为**`std::vector<std::map<std::string, float>>`
> 输出一个列表,每个元素是一个讲话片段
>
> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻
### 提示
1. `setAudioCofig`函数必须在`init`函数前调用
2. 输入的音频文件的采样率必须与代码中设置的保持一致
- [模型介绍](../)
- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md)

@ -0,0 +1,65 @@
#include "vad.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio "
"run_option, "
"e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav"
<< std::endl;
return -1;
}
std::string model_file = argv[1];
std::string audio_file = argv[2];
int sr = 16000;
Vad vad(model_file);
// custom config, but must be set before init
vad.SetConfig(sr, 32, 0.45f, 200, 0, 0);
vad.Init();
std::vector<float> inputWav; // [0, 1]
wav::WavReader wav_reader = wav::WavReader(audio_file);
assert(wav_reader.sample_rate() == sr);
auto num_samples = wav_reader.num_samples();
inputWav.resize(num_samples);
for (int i = 0; i < num_samples; i++) {
inputWav[i] = wav_reader.data()[i] / 32768;
}
int window_size_samples = vad.WindowSizeSamples();
for (int64_t j = 0; j < num_samples; j += window_size_samples) {
auto start = j;
auto end = start + window_size_samples >= num_samples
? num_samples
: start + window_size_samples;
auto current_chunk_size = end - start;
std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
assert(r.size() == current_chunk_size);
if (!vad.ForwardChunk(r)) {
std::cerr << "Failed to inference while using model:"
<< vad.ModelName() << "." << std::endl;
return false;
}
Vad::State s = vad.Postprocess();
std::cout << s << " ";
}
std::cout << std::endl;
std::vector<std::map<std::string, float>> result = vad.GetResult();
for (auto& res : result) {
std::cout << "speak start: " << res["start"]
<< " s, end: " << res["end"] << " s | ";
}
std::cout << "\b\b " << std::endl;
vad.Reset();
return 0;
}

@ -0,0 +1,306 @@
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "vad.h"
#include <cstring>
#include <iomanip>
#ifdef NDEBUG
#define LOG_DEBUG \
::fastdeploy::FDLogger(true, "[DEBUG]") << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t"
#else
#define LOG_DEBUG \
::fastdeploy::FDLogger(false, "[DEBUG]") \
<< __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
#endif
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption&
custom_option /* = fastdeploy::RuntimeOption() */) {
valid_cpu_backends = {fastdeploy::Backend::ORT,
fastdeploy::Backend::OPENVINO};
valid_gpu_backends = {fastdeploy::Backend::ORT, fastdeploy::Backend::TRT};
runtime_option = custom_option;
// ORT backend
runtime_option.UseCpu();
runtime_option.UseOrtBackend();
runtime_option.model_format = fastdeploy::ModelFormat::ONNX;
// grap opt level
runtime_option.ort_option.graph_optimization_level = 99;
// one-thread
runtime_option.ort_option.intra_op_num_threads = 1;
runtime_option.ort_option.inter_op_num_threads = 1;
// model path
runtime_option.model_file = model_file;
}
void Vad::Init() {
std::call_once(init_, [&]() { initialized = Initialize(); });
}
std::string Vad::ModelName() const { return "VAD"; }
void Vad::SetConfig(int sr,
int frame_ms,
float threshold,
int min_silence_duration_ms,
int speech_pad_left_ms,
int speech_pad_right_ms) {
if (initialized) {
fastdeploy::FDERROR << "SetConfig must be called before init"
<< std::endl;
throw std::runtime_error("SetConfig must be called before init");
}
sample_rate_ = sr;
sr_per_ms_ = sr / 1000;
threshold_ = threshold;
frame_ms_ = frame_ms;
min_silence_samples_ = min_silence_duration_ms * sr_per_ms_;
speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_;
speech_pad_right_samples_ = speech_pad_right_ms * sr_per_ms_;
// init chunk size
window_size_samples_ = frame_ms * sr_per_ms_;
current_chunk_size_ = window_size_samples_;
fastdeploy::FDINFO << "sr=" << sr << " threshold=" << threshold
<< " frame_ms=" << frame_ms
<< " min_silence_duration_ms=" << min_silence_duration_ms
<< " speech_pad_left_ms=" << speech_pad_left_ms
<< " speech_pad_right_ms=" << speech_pad_right_ms;
}
void Vad::Reset() {
std::memset(h_.data(), 0.0f, h_.size() * sizeof(float));
std::memset(c_.data(), 0.0f, c_.size() * sizeof(float));
triggerd_ = false;
temp_end_ = 0;
current_sample_ = 0;
speakStart_.clear();
speakEnd_.clear();
states_.clear();
}
bool Vad::Initialize() {
// input & output holder
inputTensors_.resize(4);
outputTensors_.resize(3);
// input shape
input_node_dims_.emplace_back(1);
input_node_dims_.emplace_back(window_size_samples_);
// sr buffer
sr_.resize(1);
sr_[0] = sample_rate_;
// hidden state buffer
h_.resize(size_hc_);
c_.resize(size_hc_);
Reset();
// InitRuntime
if (!InitRuntime()) {
fastdeploy::FDERROR << "Failed to initialize fastdeploy backend."
<< std::endl;
return false;
}
fastdeploy::FDINFO << "init done.";
return true;
}
bool Vad::ForwardChunk(std::vector<float>& chunk) {
// last chunk may not be window_size_samples_
input_node_dims_.back() = chunk.size();
assert(window_size_samples_ >= chunk.size());
current_chunk_size_ = chunk.size();
inputTensors_[0].name = "input";
inputTensors_[0].SetExternalData(
input_node_dims_, fastdeploy::FDDataType::FP32, chunk.data());
inputTensors_[1].name = "sr";
inputTensors_[1].SetExternalData(
sr_node_dims_, fastdeploy::FDDataType::INT64, sr_.data());
inputTensors_[2].name = "h";
inputTensors_[2].SetExternalData(
hc_node_dims_, fastdeploy::FDDataType::FP32, h_.data());
inputTensors_[3].name = "c";
inputTensors_[3].SetExternalData(
hc_node_dims_, fastdeploy::FDDataType::FP32, c_.data());
if (!Infer(inputTensors_, &outputTensors_)) {
return false;
}
// Push forward sample index
current_sample_ += current_chunk_size_;
return true;
}
const Vad::State& Vad::Postprocess() {
// update prob, h, c
outputProb_ = *(float*)outputTensors_[0].Data();
auto* hn = static_cast<float*>(outputTensors_[1].MutableData());
std::memcpy(h_.data(), hn, h_.size() * sizeof(float));
auto* cn = static_cast<float*>(outputTensors_[2].MutableData());
std::memcpy(c_.data(), cn, c_.size() * sizeof(float));
if (outputProb_ < threshold_ && !triggerd_) {
// 1. Silence
LOG_DEBUG << "{ silence: " << 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::SIL);
} else if (outputProb_ >= threshold_ && !triggerd_) {
// 2. Start
triggerd_ = true;
speech_start_ =
current_sample_ - current_chunk_size_ - speech_pad_left_samples_;
float start_sec = 1.0 * speech_start_ / sample_rate_;
speakStart_.emplace_back(start_sec);
LOG_DEBUG << "{ speech start: " << start_sec
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::START);
} else if (outputProb_ >= threshold_ - 0.15 && triggerd_) {
// 3. Continue
if (temp_end_ != 0) {
// speech prob relaxation, speech continues again
LOG_DEBUG << "{ speech fake end(sil < min_silence_ms) to continue: "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
temp_end_ = 0;
} else {
// speech prob relaxation, keep tracking speech
LOG_DEBUG << "{ speech continue: "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
}
states_.emplace_back(Vad::State::SPEECH);
} else if (outputProb_ < threshold_ - 0.15 && triggerd_) {
// 4. End
if (temp_end_ == 0) {
temp_end_ = current_sample_;
}
// check possible speech end
if (current_sample_ - temp_end_ < min_silence_samples_) {
// a. silence < min_slience_samples, continue speaking
LOG_DEBUG << "{ speech fake end(sil < min_silence_ms): "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::SIL);
} else {
// b. silence >= min_slience_samples, end speaking
speech_end_ = current_sample_ + speech_pad_right_samples_;
temp_end_ = 0;
triggerd_ = false;
auto end_sec = 1.0 * speech_end_ / sample_rate_;
speakEnd_.emplace_back(end_sec);
LOG_DEBUG << "{ speech end: " << end_sec
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::END);
}
}
return states_.back();
}
const std::vector<std::map<std::string, float>> Vad::GetResult(
float removeThreshold,
float expandHeadThreshold,
float expandTailThreshold,
float mergeThreshold) const {
float audioLength = 1.0 * current_sample_ / sample_rate_;
if (speakStart_.empty() && speakEnd_.empty()) {
return {};
}
if (speakEnd_.size() != speakStart_.size()) {
// set the audio length as the last end
speakEnd_.emplace_back(audioLength);
}
// Remove too short segments
// auto startIter = speakStart_.begin();
// auto endIter = speakEnd_.begin();
// while (startIter != speakStart_.end()) {
// if (removeThreshold < audioLength &&
// *endIter - *startIter < removeThreshold) {
// startIter = speakStart_.erase(startIter);
// endIter = speakEnd_.erase(endIter);
// } else {
// startIter++;
// endIter++;
// }
// }
// // Expand to avoid to tight cut.
// startIter = speakStart_.begin();
// endIter = speakEnd_.begin();
// *startIter = std::fmax(0.f, *startIter - expandHeadThreshold);
// *endIter = std::fmin(*endIter + expandTailThreshold, *(startIter + 1));
// endIter = speakEnd_.end() - 1;
// startIter = speakStart_.end() - 1;
// *startIter = fmax(*startIter - expandHeadThreshold, *(endIter - 1));
// *endIter = std::fmin(*endIter + expandTailThreshold, audioLength);
// for (int i = 1; i < speakStart_.size() - 1; ++i) {
// speakStart_[i] = std::fmax(speakStart_[i] - expandHeadThreshold,
// speakEnd_[i - 1]);
// speakEnd_[i] = std::fmin(speakEnd_[i] + expandTailThreshold,
// speakStart_[i + 1]);
// }
// // Merge very closed segments
// startIter = speakStart_.begin() + 1;
// endIter = speakEnd_.begin();
// while (startIter != speakStart_.end()) {
// if (*startIter - *endIter < mergeThreshold) {
// startIter = speakStart_.erase(startIter);
// endIter = speakEnd_.erase(endIter);
// } else {
// startIter++;
// endIter++;
// }
// }
std::vector<std::map<std::string, float>> result;
for (int i = 0; i < speakStart_.size(); ++i) {
result.emplace_back(std::map<std::string, float>(
{{"start", speakStart_[i]}, {"end", speakEnd_[i]}}));
}
return result;
}
std::ostream& operator<<(std::ostream& os, const Vad::State& s) {
switch (s) {
case Vad::State::SIL:
os << "[SIL]";
break;
case Vad::State::START:
os << "[STA]";
break;
case Vad::State::SPEECH:
os << "[SPE]";
break;
case Vad::State::END:
os << "[END]";
break;
default:
// illegal state
os << "[ILL]";
break;
}
return os;
}

@ -0,0 +1,124 @@
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <mutex>
#include <vector>
#include "./wav.h"
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/runtime.h"
class Vad : public fastdeploy::FastDeployModel {
public:
enum class State { SIL = 0, START, SPEECH, END };
friend std::ostream& operator<<(std::ostream& os, const Vad::State& s);
Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option =
fastdeploy::RuntimeOption());
void Init();
void Reset();
void SetConfig(int sr,
int frame_ms,
float threshold,
int min_silence_duration_ms,
int speech_pad_left_ms,
int speech_pad_right_ms);
bool ForwardChunk(std::vector<float>& chunk);
const State& Postprocess();
const std::vector<std::map<std::string, float>> GetResult(
float removeThreshold = 0.0,
float expandHeadThreshold = 0.0,
float expandTailThreshold = 0,
float mergeThreshold = 0.0) const;
const std::vector<State> GetStates() const { return states_; }
int SampleRate() const { return sample_rate_; }
int FrameMs() const { return frame_ms_; }
int64_t WindowSizeSamples() const { return window_size_samples_; }
float Threshold() const { return threshold_; }
int MinSilenceDurationMs() const {
return min_silence_samples_ / sample_rate_;
}
int SpeechPadLeftMs() const {
return speech_pad_left_samples_ / sample_rate_;
}
int SpeechPadRightMs() const {
return speech_pad_right_samples_ / sample_rate_;
}
int MinSilenceSamples() const { return min_silence_samples_; }
int SpeechPadLeftSamples() const { return speech_pad_left_samples_; }
int SpeechPadRightSamples() const { return speech_pad_right_samples_; }
std::string ModelName() const override;
private:
bool Initialize();
private:
std::once_flag init_;
// input and output
std::vector<fastdeploy::FDTensor> inputTensors_;
std::vector<fastdeploy::FDTensor> outputTensors_;
// model states
bool triggerd_ = false;
unsigned int speech_start_ = 0;
unsigned int speech_end_ = 0;
unsigned int temp_end_ = 0;
unsigned int current_sample_ = 0;
unsigned int current_chunk_size_ = 0;
// MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes
float outputProb_;
std::vector<float> speakStart_;
mutable std::vector<float> speakEnd_;
std::vector<State> states_;
/* ========================================================================
*/
int sample_rate_ = 16000;
int frame_ms_ = 32; // 32, 64, 96 for 16k
float threshold_ = 0.5f;
int64_t window_size_samples_; // support 256 512 768 for 8k; 512 1024 1536
// for 16k.
int sr_per_ms_; // support 8 or 16
int min_silence_samples_; // sr_per_ms_ * frame_ms_
int speech_pad_left_samples_{0}; // usually 250ms
int speech_pad_right_samples_{0}; // usually 0
/* ========================================================================
*/
std::vector<int64_t> sr_;
const size_t size_hc_ = 2 * 1 * 64; // It's FIXED.
std::vector<float> h_;
std::vector<float> c_;
std::vector<int64_t> input_node_dims_;
const std::vector<int64_t> sr_node_dims_ = {1};
const std::vector<int64_t> hc_node_dims_ = {2, 1, 64};
};

@ -0,0 +1,197 @@
// Copyright (c) 2016 Personal (Binbin Zhang)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
namespace wav {
struct WavHeader {
char riff[4]; // "riff"
unsigned int size;
char wav[4]; // "WAVE"
char fmt[4]; // "fmt "
unsigned int fmt_size;
uint16_t format;
uint16_t channels;
unsigned int sample_rate;
unsigned int bytes_per_second;
uint16_t block_size;
uint16_t bit;
char data[4]; // "data"
unsigned int data_size;
};
class WavReader {
public:
WavReader() : data_(nullptr) {}
explicit WavReader(const std::string& filename) { Open(filename); }
bool Open(const std::string& filename) {
FILE* fp = fopen(filename.c_str(), "rb");
if (NULL == fp) {
std::cout << "Error in read " << filename;
return false;
}
WavHeader header;
fread(&header, 1, sizeof(header), fp);
if (header.fmt_size < 16) {
fprintf(stderr,
"WaveData: expect PCM format data "
"to have fmt chunk of at least size 16.\n");
return false;
} else if (header.fmt_size > 16) {
int offset = 44 - 8 + header.fmt_size - 16;
fseek(fp, offset, SEEK_SET);
fread(header.data, 8, sizeof(char), fp);
}
// check "riff" "WAVE" "fmt " "data"
// Skip any sub-chunks between "fmt" and "data". Usually there will
// be a single "fact" sub chunk, but on Windows there can also be a
// "list" sub chunk.
while (0 != strncmp(header.data, "data", 4)) {
// We will just ignore the data in these chunks.
fseek(fp, header.data_size, SEEK_CUR);
// read next sub chunk
fread(header.data, 8, sizeof(char), fp);
}
num_channel_ = header.channels;
sample_rate_ = header.sample_rate;
bits_per_sample_ = header.bit;
int num_data = header.data_size / (bits_per_sample_ / 8);
data_ = new float[num_data]; // Create 1-dim array
num_samples_ = num_data / num_channel_;
for (int i = 0; i < num_data; ++i) {
switch (bits_per_sample_) {
case 8: {
char sample;
fread(&sample, 1, sizeof(char), fp);
data_[i] = static_cast<float>(sample);
break;
}
case 16: {
int16_t sample;
fread(&sample, 1, sizeof(int16_t), fp);
// std::cout << sample;
data_[i] = static_cast<float>(sample);
// std::cout << data_[i];
break;
}
case 32: {
int sample;
fread(&sample, 1, sizeof(int), fp);
data_[i] = static_cast<float>(sample);
break;
}
default:
fprintf(stderr, "unsupported quantization bits");
exit(1);
}
}
fclose(fp);
return true;
}
int num_channel() const { return num_channel_; }
int sample_rate() const { return sample_rate_; }
int bits_per_sample() const { return bits_per_sample_; }
int num_samples() const { return num_samples_; }
const float* data() const { return data_; }
private:
int num_channel_;
int sample_rate_;
int bits_per_sample_;
int num_samples_; // sample points per channel
float* data_;
};
class WavWriter {
public:
WavWriter(const float* data,
int num_samples,
int num_channel,
int sample_rate,
int bits_per_sample)
: data_(data),
num_samples_(num_samples),
num_channel_(num_channel),
sample_rate_(sample_rate),
bits_per_sample_(bits_per_sample) {}
void Write(const std::string& filename) {
FILE* fp = fopen(filename.c_str(), "w");
// init char 'riff' 'WAVE' 'fmt ' 'data'
WavHeader header;
char wav_header[44] = {
0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56,
0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};
memcpy(&header, wav_header, sizeof(header));
header.channels = num_channel_;
header.bit = bits_per_sample_;
header.sample_rate = sample_rate_;
header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);
header.size = sizeof(header) - 8 + header.data_size;
header.bytes_per_second =
sample_rate_ * num_channel_ * (bits_per_sample_ / 8);
header.block_size = num_channel_ * (bits_per_sample_ / 8);
fwrite(&header, 1, sizeof(header), fp);
for (int i = 0; i < num_samples_; ++i) {
for (int j = 0; j < num_channel_; ++j) {
switch (bits_per_sample_) {
case 8: {
char sample =
static_cast<char>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
case 16: {
int16_t sample =
static_cast<int16_t>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
case 32: {
int sample =
static_cast<int>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
}
}
}
fclose(fp);
}
private:
const float* data_;
int num_samples_; // total float points in data_
int num_channel_;
int sample_rate_;
int bits_per_sample_;
};
} // namespace wav
Loading…
Cancel
Save