opt to compile asr,cls,vad; add vad; format code (#2968)

pull/2993/head
Hui Zhang 2 years ago committed by GitHub
parent 78e29c8ec4
commit b35fc01a3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,3 +1,6 @@
engine/common/base/flags.h
engine/common/base/log.h
tools/valgrind*
*log
fc_patch/*

@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1)
set(CMAKE_VERBOSE_MAKEFILE on)
# set std-14
set(CMAKE_CXX_STANDARD 14)
include(FetchContent)
include(ExternalProject)
@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(CMAKE_CXX_FLAGS)
set(CMAKE_CXX_FLAGS_DEBUG)
set(CMAKE_CXX_FLAGS_RELEASE)
# set std-14
set(CMAKE_CXX_STANDARD 14)
# compiler option
# Keep the same with openfst, -fPIC or -fpic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")
add_compile_options(-fPIC)
###############################################################################
# Option Configurations
###############################################################################
option(WITH_ASR "build asr" ON)
option(WITH_CLS "build cls" ON)
option(WITH_VAD "build vad" ON)
option(TEST_DEBUG "option for debug" OFF)
option(USE_PROFILING "enable c++ profling" OFF)
option(WITH_TESTING "unit test" ON)
@ -47,31 +59,40 @@ option(WITH_TESTING "unit test" ON)
option(USING_GPU "u2 compute on GPU." OFF)
###############################################################################
# Include third party
# Include Third Party
###############################################################################
include(gflags)
include(glog)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
# paddle lib
include(paddleinference)
# gtest
if(WITH_TESTING)
include(gtest) # download, build, install gtest
endif()
# fastdeploy
include(fastdeploy)
if(WITH_ASR)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
endif()
###############################################################################
# Find Package
###############################################################################
# python/pybind11/threads
find_package(Threads REQUIRED)
# https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG)
if(Python3_FOUND)
if(WITH_ASR)
if(Python3_FOUND)
message(STATUS "Python3_FOUND = ${Python3_FOUND}")
message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
@ -79,70 +100,76 @@ if(Python3_FOUND)
message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
endif()
endif()
message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
if(pybind11_FOUND)
if(pybind11_FOUND)
message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
endif()
endif()
# paddle libpaddle.so
# paddle include and link option
# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
execute_process(
# paddle libpaddle.so
# paddle include and link option
# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
execute_process(
COMMAND python -c "\
import os;\
import paddle;\
include_dir=paddle.sysconfig.get_include();\
paddle_dir=os.path.split(include_dir)[0];\
libs_dir=os.path.join(paddle_dir, 'libs');\
fluid_dir=os.path.join(paddle_dir, 'fluid');\
out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
import os;\
import paddle;\
include_dir=paddle.sysconfig.get_include();\
paddle_dir=os.path.split(include_dir)[0];\
libs_dir=os.path.join(paddle_dir, 'libs');\
fluid_dir=os.path.join(paddle_dir, 'fluid');\
out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
"
OUTPUT_VARIABLE PADDLE_LINK_FLAGS
RESULT_VARIABLE SUCESS)
message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
# paddle compile option
# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
execute_process(
# paddle compile option
# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
execute_process(
COMMAND python -c "\
import paddle; \
include_dir = paddle.sysconfig.get_include(); \
print(f\"-I{include_dir}\"); \
import paddle; \
include_dir = paddle.sysconfig.get_include(); \
print(f\"-I{include_dir}\"); \
"
OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
# for LD_LIBRARY_PATH
# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
execute_process(
# for LD_LIBRARY_PATH
# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
execute_process(
COMMAND python -c "\
import os; \
import paddle; \
include_dir=paddle.sysconfig.get_include(); \
paddle_dir=os.path.split(include_dir)[0]; \
libs_dir=os.path.join(paddle_dir, 'libs'); \
fluid_dir=os.path.join(paddle_dir, 'fluid'); \
out=':'.join([libs_dir, fluid_dir]); print(out); \
import os; \
import paddle; \
include_dir=paddle.sysconfig.get_include(); \
paddle_dir=os.path.split(include_dir)[0]; \
libs_dir=os.path.join(paddle_dir, 'libs'); \
fluid_dir=os.path.join(paddle_dir, 'fluid'); \
out=':'.join([libs_dir, fluid_dir]); print(out); \
"
OUTPUT_VARIABLE PADDLE_LIB_DIRS)
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
endif()
add_compile_options(-fPIC)
###############################################################################
# Add local library
###############################################################################
set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)
message(STATUS "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
message(STATUS "CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}")
add_subdirectory(engine)

@ -4,5 +4,5 @@ set -xe
# the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
cmake -B build
cmake -B build -DWITH_ASR=OFF -DWITH_CLS=OFF
cmake --build build -j

@ -8,11 +8,11 @@ windows_x86")
set(CMAKE_VERBOSE_MAKEFILE ON)
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64")
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
endif()
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7")
endif()
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# fix compiler flags conflict, since fastdeploy using c++11 for project
set(CMAKE_CXX_STANDARD 14)
include_directories(${FASTDEPLOY_INCS})
message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")

@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)
add_subdirectory(asr)
add_subdirectory(common)
add_subdirectory(kaldi)
add_subdirectory(common)
if(WITH_ASR)
add_subdirectory(asr)
endif()
if(WITH_CLS)
add_subdirectory(cls)
endif()
if(WITH_VAD)
add_subdirectory(vad)
endif()
add_subdirectory(codelab)
add_subdirectory(cls)

@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
decoder_ = std::make_unique<CTCPrefixBeamSearch>(
resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
} else {
decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts);
decoder_ = std::make_unique<TLGDecoder>(
resource.decoder_opts.tlg_decoder_opts);
}
symbol_table_ = decoder_->WordSymbolTable();

@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../
)
add_subdirectory(utils)
add_subdirectory(base)
add_subdirectory(matrix)
include_directories(

@ -0,0 +1,20 @@
if(WITH_ASR)
add_compile_options(-DWITH_ASR)
set(PPS_FLAGS_LIB "fst/flags.h")
set(PPS_GLOB_LIB "fst/log.h")
else()
set(PPS_FLAGS_LIB "gflags/gflags.h")
set(PPS_GLOB_LIB "glog/logging.h")
endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in
${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h")
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/log.h.in
${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h")

@ -14,4 +14,4 @@
#pragma once
#include "fst/flags.h"
#include "@PPS_FLAGS_LIB@"

@ -14,4 +14,4 @@
#pragma once
#include "fst/log.h"
#include "@PPS_GLOB_LIB@"

@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
dim_ = mean_stats_.size() - 1;
}
void CMVN::ReadCMVNFromJson(string cmvn_file) {
void CMVN::ReadCMVNFromJson(std::string cmvn_file) {
std::string json_str = ppspeech::ReadFile2String(cmvn_file);
picojson::value value;
std::string err;

@ -21,6 +21,7 @@
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#include <limits>
#include <map>
#include "frontend/feature-window.h"

@ -7,6 +7,7 @@
#include "frontend/feature-window.h"
#include <cmath>
#include <limits>
#include <vector>
#ifndef M_2PI

@ -17,12 +17,12 @@
*/
#include "frontend/rfft.h"
#include "base/log.h"
#include <cmath>
#include <memory>
#include <vector>
#include "base/log.h"
// see fftsg.c
#ifdef __cplusplus
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);

@ -25,40 +25,41 @@
namespace kaldi {
/// Empty constructor
template<typename Real>
Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
template <typename Real>
Matrix<Real>::Matrix() : MatrixBase<Real>(NULL, 0, 0, 0) {}
/*
template<>
template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float>
&ra, const VectorBase<float> &rb);
template<>
template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb);
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double>
&ra, const VectorBase<double> &rb);
*/
template<typename Real>
inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
template <typename Real>
inline std::ostream& operator<<(std::ostream& os, const MatrixBase<Real>& M) {
M.Write(os, false);
return os;
}
template<typename Real>
inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) {
template <typename Real>
inline std::istream& operator>>(std::istream& is, Matrix<Real>& M) {
M.Read(is, false);
return is;
}
template<typename Real>
inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) {
template <typename Real>
inline std::istream& operator>>(std::istream& is, MatrixBase<Real>& M) {
M.Read(is, false);
return is;
}
}// namespace kaldi
} // namespace kaldi
#endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_

File diff suppressed because it is too large Load Diff

@ -38,7 +38,7 @@ namespace kaldi {
/// Base class which provides matrix operations not involving resizing
/// or allocation. Classes Matrix and SubMatrix inherit from it and take care
/// of allocation and resizing.
template<typename Real>
template <typename Real>
class MatrixBase {
public:
// so this child can access protected members of other instances.
@ -62,22 +62,20 @@ class MatrixBase {
}
/// Gives pointer to raw data (const).
inline const Real* Data() const {
return data_;
}
inline const Real *Data() const { return data_; }
/// Gives pointer to raw data (non-const).
inline Real* Data() { return data_; }
inline Real *Data() { return data_; }
/// Returns pointer to data for one row (non-const)
inline Real* RowData(MatrixIndexT i) {
inline Real *RowData(MatrixIndexT i) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_));
return data_ + i * stride_;
}
/// Returns pointer to data for one row (const)
inline const Real* RowData(MatrixIndexT i) const {
inline const Real *RowData(MatrixIndexT i) const {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_));
return data_ + i * stride_;
@ -85,8 +83,9 @@ class MatrixBase {
/// Indexing operator, non-const
/// (only checks sizes if compiled with -DKALDI_PARANOID)
inline Real& operator() (MatrixIndexT r, MatrixIndexT c) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
inline Real &operator()(MatrixIndexT r, MatrixIndexT c) {
KALDI_PARANOID_ASSERT(
static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_));
@ -94,12 +93,13 @@ class MatrixBase {
}
/// Indexing operator, provided for ease of debugging (gdb doesn't work
/// with parenthesis operator).
Real &Index (MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); }
Real &Index(MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); }
/// Indexing operator, const
/// (only checks sizes if compiled with -DKALDI_PARANOID)
inline const Real operator() (MatrixIndexT r, MatrixIndexT c) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
inline const Real operator()(MatrixIndexT r, MatrixIndexT c) const {
KALDI_PARANOID_ASSERT(
static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_));
@ -115,22 +115,22 @@ class MatrixBase {
/// Sets to zero, except ones along diagonal [for non-square matrices too]
/// Copy given matrix. (no resize is done).
template<typename OtherReal>
void CopyFromMat(const MatrixBase<OtherReal> & M,
template <typename OtherReal>
void CopyFromMat(const MatrixBase<OtherReal> &M,
MatrixTransposeType trans = kNoTrans);
/// Copy from compressed matrix.
//void CopyFromMat(const CompressedMatrix &M);
// void CopyFromMat(const CompressedMatrix &M);
/// Copy given tpmatrix. (no resize is done).
//template<typename OtherReal>
//void CopyFromTp(const TpMatrix<OtherReal> &M,
//MatrixTransposeType trans = kNoTrans);
// template<typename OtherReal>
// void CopyFromTp(const TpMatrix<OtherReal> &M,
// MatrixTransposeType trans = kNoTrans);
/// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h
//template<typename OtherReal>
//void CopyFromMat(const CuMatrixBase<OtherReal> &M,
//MatrixTransposeType trans = kNoTrans);
// template<typename OtherReal>
// void CopyFromMat(const CuMatrixBase<OtherReal> &M,
// MatrixTransposeType trans = kNoTrans);
/// This function has two modes of operation. If v.Dim() == NumRows() *
/// NumCols(), then treats the vector as a row-by-row concatenation of a
@ -138,10 +138,11 @@ class MatrixBase {
/// if v.Dim() == NumCols(), it sets each row of *this to a copy of v.
void CopyRowsFromVec(const VectorBase<Real> &v);
/// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc
//void CopyRowsFromVec(const CuVectorBase<Real> &v);
/// This version of CopyRowsFromVec is implemented in
/// ../cudamatrix/cu-vector.cc
// void CopyRowsFromVec(const CuVectorBase<Real> &v);
template<typename OtherReal>
template <typename OtherReal>
void CopyRowsFromVec(const VectorBase<OtherReal> &v);
/// Copies vector into matrix, column-by-column.
@ -177,8 +178,8 @@ class MatrixBase {
const MatrixIndexT num_rows,
const MatrixIndexT col_offset,
const MatrixIndexT num_cols) const {
return SubMatrix<Real>(*this, row_offset, num_rows,
col_offset, num_cols);
return SubMatrix<Real>(
*this, row_offset, num_rows, col_offset, num_cols);
}
inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset,
const MatrixIndexT num_rows) const {
@ -189,7 +190,7 @@ class MatrixBase {
return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
}
/*
/*
/// Returns sum of all elements in matrix.
Real Sum() const;
/// Returns trace of matrix.
@ -223,7 +224,8 @@ class MatrixBase {
/// each row by a scalar taken from that dimension of the vector.
void MulRowsVec(const VectorBase<Real> &scale);
/// Divide each row into src.NumCols() equal groups, and then scale i'th row's
/// Divide each row into src.NumCols() equal groups, and then scale i'th
row's
/// j'th group of elements by src(i, j). Requires src.NumRows() ==
/// this->NumRows() and this->NumCols() % src.NumCols() == 0.
void MulRowsGroupMat(const MatrixBase<Real> &src);
@ -242,77 +244,79 @@ class MatrixBase {
/// Does inversion in double precision even if matrix was not double.
void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
bool inverse_needed = true);
*/
*/
/// Inverts all the elements of the matrix
void InvertElements();
/*
/*
/// Transpose the matrix. This one is only
/// applicable to square matrices (the one in the
/// Matrix child class works also for non-square.
void Transpose();
*/
*/
/// Copies column r from column indices[r] of src.
/// As a special case, if indexes[i] == -1, sets column i to zero.
/// all elements of "indices" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void CopyCols(const MatrixBase<Real> &src,
const MatrixIndexT *indices);
void CopyCols(const MatrixBase<Real> &src, const MatrixIndexT *indices);
/// Copies row r from row indices[r] of src (does nothing
/// As a special case, if indexes[i] == -1, sets row i to zero.
/// all elements of "indices" must be in [-1, src.NumRows()-1],
/// and src.NumCols() must equal this.NumCols()
void CopyRows(const MatrixBase<Real> &src,
const MatrixIndexT *indices);
void CopyRows(const MatrixBase<Real> &src, const MatrixIndexT *indices);
/// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
//void AddCols(const MatrixBase<Real> &src,
// void AddCols(const MatrixBase<Real> &src,
// const MatrixIndexT *indices);
/// Copies row r of this matrix from an array of floats at the location given
/// Copies row r of this matrix from an array of floats at the location
/// given
/// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero.
/// Note: we are using "pointer to const pointer to const object" for "src",
/// because we may create "src" by calling Data() of const CuArray
void CopyRows(const Real *const *src);
/// Copies row r of this matrix to the array of floats at the location given
/// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that none
/// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that
/// none
/// of the memory regions pointed to by the pointers in "dst" overlap (e.g.
/// none of the pointers should be the same).
void CopyToRows(Real *const *dst) const;
/// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
/// If indexes[r] < 0, does not add anything. all elements of "indexes" must
/// be in [-1, src.NumRows()-1], and src.NumCols() must equal this.NumCols().
/// be in [-1, src.NumRows()-1], and src.NumCols() must equal
/// this.NumCols().
// void AddRows(Real alpha,
// const MatrixBase<Real> &src,
// const MatrixIndexT *indexes);
/// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the
/// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as
/// the
/// beginning of a region of memory representing a vector of floats, of the
/// same length as this.NumCols(). If src[r] is NULL, does not add anything.
//void AddRows(Real alpha, const Real *const *src);
// void AddRows(Real alpha, const Real *const *src);
/// For each row r of this matrix, adds it (times alpha) to the array of
/// floats at the location given by dst[r]. If dst[r] is NULL, does not do
/// anything for that row. Requires that none of the memory regions pointed
/// to by the pointers in "dst" overlap (e.g. none of the pointers should be
/// the same).
//void AddToRows(Real alpha, Real *const *dst) const;
// void AddToRows(Real alpha, Real *const *dst) const;
/// For each row i of *this, adds this->Row(i) to
/// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing.
/// Requires that all the indexes[i] that are >= 0
/// be distinct, otherwise the behavior is undefined.
//void AddToRows(Real alpha,
// void AddToRows(Real alpha,
// const MatrixIndexT *indexes,
// MatrixBase<Real> *dst) const;
/*
/*
inline void ApplyPow(Real power) {
this -> Pow(*this, power);
}
@ -349,66 +353,82 @@ class MatrixBase {
inline void ApplyLog() {
this -> Log(*this);
}
*/
/// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
/// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is
/// slightly complicated, due to the need for P to be real. In the symmetric
*/
/// Eigenvalue Decomposition of a square NxN matrix into the form (*this) =
/// P D
/// P^{-1}. Be careful: the relationship of D to the eigenvalues we output
/// is
/// slightly complicated, due to the need for P to be real. In the
/// symmetric
/// case D is diagonal and real, but in
/// the non-symmetric case there may be complex-conjugate pairs of eigenvalues.
/// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually
/// be block diagonal, with 2x2 blocks corresponding to any such pairs. If a
/// the non-symmetric case there may be complex-conjugate pairs of
/// eigenvalues.
/// In this case, for the equation (*this) = P D P^{-1} to hold, D must
/// actually
/// be block diagonal, with 2x2 blocks corresponding to any such pairs. If
/// a
/// pair is lambda +- i*mu, D will have a corresponding 2x2 block
/// [lambda, mu; -mu, lambda].
/// Note that if the input matrix (*this) is non-invertible, P may not be invertible
/// so in this case instead of the equation (*this) = P D P^{-1} holding, we have
/// Note that if the input matrix (*this) is non-invertible, P may not be
/// invertible
/// so in this case instead of the equation (*this) = P D P^{-1} holding, we
/// have
/// instead (*this) P = P D.
///
/// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag.
//void Eig(MatrixBase<Real> *P,
/// The non-member function CreateEigenvalueMatrix creates D from eigs_real
/// and eigs_imag.
// void Eig(MatrixBase<Real> *P,
// VectorBase<Real> *eigs_real,
// VectorBase<Real> *eigs_imag) const;
/// The Power method attempts to take the matrix to a power using a method that
/// works in general for fractional and negative powers. The input matrix must
/// The Power method attempts to take the matrix to a power using a method
/// that
/// works in general for fractional and negative powers. The input matrix
/// must
/// be invertible and have reasonable condition (or we don't guarantee the
/// results. The method is based on the eigenvalue decomposition. It will
/// return false and leave the matrix unchanged, if at entry the matrix had
/// real negative eigenvalues (or if it had zero eigenvalues and the power was
/// real negative eigenvalues (or if it had zero eigenvalues and the power
/// was
/// negative).
// bool Power(Real pow);
// bool Power(Real pow);
/** Singular value decomposition
Major limitations:
For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return
For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we
return
the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the
one on the left is rectangular.
In Svd, *this = U*diag(S)*Vt.
Null pointers for U and/or Vt at input mean we do not want that output. We
Null pointers for U and/or Vt at input mean we do not want that output.
We
expect that S.Dim() == m, U is either NULL or m by n,
and v is either NULL or n by n.
The singular values are not sorted (use SortSvd for that). */
//void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
// void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
// MatrixBase<Real> *Vt); // Destroys calling matrix.
/// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already
/// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is
/// already
/// transposed; the normal formulation is U diag(s) V^T.
/// Null pointers for U or V mean we don't want that output (this saves
/// compute). The singular values are not sorted (use SortSvd for that).
//void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
// void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
// MatrixBase<Real> *Vt) const;
/// Compute SVD but only retain the singular values.
//void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
// void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
/// Returns smallest singular value.
//Real MinSingularValue() const {
// Real MinSingularValue() const {
// Vector<Real> tmp(std::min(NumRows(), NumCols()));
//Svd(&tmp);
//return tmp.Min();
// Svd(&tmp);
// return tmp.Min();
//}
//void TestUninitialized() const; // This function is designed so that if any element
// void TestUninitialized() const; // This function is designed so that if
// any element
// if the matrix is uninitialized memory, valgrind will complain.
/// Returns condition number by computing Svd. Works even if cols > rows.
@ -422,16 +442,19 @@ class MatrixBase {
/// Returns true if matrix is Diagonal.
bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if the matrix is all zeros, except for ones on diagonal. (it
/// Returns true if the matrix is all zeros, except for ones on diagonal.
(it
/// does not have to be square). More specifically, this function returns
/// false if for any i, j, (*this)(i, j) differs by more than cutoff from the
/// false if for any i, j, (*this)(i, j) differs by more than cutoff from
the
/// expression (i == j ? 1 : 0).
bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if matrix is all zeros.
bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number
/// Frobenius norm, which is the sqrt of sum of square elements. Same as Schatten 2-norm,
/// Frobenius norm, which is the sqrt of sum of square elements. Same as
Schatten 2-norm,
/// or just "2-norm".
Real FrobeniusNorm() const;
@ -461,7 +484,8 @@ class MatrixBase {
/// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the
/// corresponding element in "src". Note: in general you can make different
/// choices for x = 0, but for now please leave it as it (i.e. returning zero)
/// choices for x = 0, but for now please leave it as it (i.e. returning
zero)
/// because it affects the RectifiedLinearComponent in the neural net code.
void Heaviside(const MatrixBase<Real> &src);
@ -477,7 +501,8 @@ class MatrixBase {
/// If the power is negative and the input to the power is zero,
/// The output will be set zero. If include_sign is true, it will
/// multiply the result by the sign of the input.
void PowAbs(const MatrixBase<Real> &src, Real power, bool include_sign=false);
void PowAbs(const MatrixBase<Real> &src, Real power, bool
include_sign=false);
void Floor(const MatrixBase<Real> &src, Real floor_val);
@ -492,36 +517,52 @@ class MatrixBase {
/// Floor(src, lower_limit);
/// Ceiling(src, upper_limit);
/// Exp(src)
void ExpLimited(const MatrixBase<Real> &src, Real lower_limit, Real upper_limit);
void ExpLimited(const MatrixBase<Real> &src, Real lower_limit, Real
upper_limit);
/// Set each element to y = log(1 + exp(x))
void SoftHinge(const MatrixBase<Real> &src);
/// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p).
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0.
/// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 /
p).
/// Requires src.NumRows() == this->NumRows() and src.NumCols() %
this->NumCols() == 0.
void GroupPnorm(const MatrixBase<Real> &src, Real power);
/// Calculate derivatives for the GroupPnorm function above...
/// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
/// call), and *this has the same dimension as "input", then it sets each element
/// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where
/// "output-elem" is whichever element of output depends on that input element.
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output,
/// if "input" is the input to the GroupPnorm function above (i.e. the "src"
variable),
/// and "output" is the result of the computation (i.e. the "this" of that
function
/// call), and *this has the same dimension as "input", then it sets each
element
/// of *this to the derivative d(output-elem)/d(input-elem) for each element
of "input", where
/// "output-elem" is whichever element of output depends on that input
element.
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
&output,
Real power);
/// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0.
/// Requires src.NumRows() == this->NumRows() and src.NumCols() %
this->NumCols() == 0.
void GroupMax(const MatrixBase<Real> &src);
/// Calculate derivatives for the GroupMax function above, where
/// "input" is the input to the GroupMax function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
/// "input" is the input to the GroupMax function above (i.e. the "src"
variable),
/// and "output" is the result of the computation (i.e. the "this" of that
function
/// call), and *this must have the same dimension as "input". Each element
/// of *this will be set to 1 if the corresponding input equals the output of
/// the group, and 0 otherwise. The equals the function derivative where it is
/// defined (it's not defined where multiple inputs in the group are equal to the output).
void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output);
/// of *this will be set to 1 if the corresponding input equals the output
of
/// the group, and 0 otherwise. The equals the function derivative where it
is
/// defined (it's not defined where multiple inputs in the group are equal
to the output).
void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
&output);
/// Set each element to the tanh of the corresponding element of "src".
void Tanh(const MatrixBase<Real> &src);
@ -535,55 +576,56 @@ class MatrixBase {
// element-by-element, set *this = diff * (1.0 - value^2).
void DiffTanh(const MatrixBase<Real> &value,
const MatrixBase<Real> &diff);
*/
*/
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
* positive semi-definite (check_thresh controls how stringent the check is;
* set it to 2 to ensure it won't ever complain, but it will zero out negative
* set it to 2 to ensure it won't ever complain, but it will zero out
* negative
* dimensions in your matrix.
*
* Caution: if you want the eigenvalues, it may make more sense to convert to
* SpMatrix and use Eig() function there, which uses eigenvalue decomposition
* Caution: if you want the eigenvalues, it may make more sense to convert
* to
* SpMatrix and use Eig() function there, which uses eigenvalue
* decomposition
* directly rather than SVD.
*/
/// stream read.
/// Use instead of stream<<*this, if you want to add to existing contents.
// Will throw exception on failure.
void Read(std::istream & in, bool binary);
void Read(std::istream &in, bool binary);
/// write to stream.
void Write(std::ostream & out, bool binary) const;
void Write(std::ostream &out, bool binary) const;
// Below is internal methods for Svd, user does not have to know about this.
protected:
/// Initializer, callable only from child.
explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) :
data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
explicit MatrixBase(Real *data,
MatrixIndexT cols,
MatrixIndexT rows,
MatrixIndexT stride)
: data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
/// Initializer, callable only from child.
/// Empty initializer, for un-initialized matrix.
explicit MatrixBase(): data_(NULL) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
explicit MatrixBase() : data_(NULL) { KALDI_ASSERT_IS_FLOATING_TYPE(Real); }
// Make sure pointers to MatrixBase cannot be deleted.
~MatrixBase() { }
~MatrixBase() {}
/// A workaround that allows SubMatrix to get a pointer to non-const data
/// for const Matrix. Unfortunately C++ does not allow us to declare a
/// "public const" inheritance or anything like that, so it would require
/// a lot of work to make the SubMatrix class totally const-correct--
/// we would have to override many of the Matrix functions.
inline Real* Data_workaround() const {
return data_;
}
inline Real *Data_workaround() const { return data_; }
/// data memory area
Real* data_;
Real *data_;
/// these attributes store the real matrix size as it is stored in memory
/// including memalignment
@ -592,63 +634,66 @@ class MatrixBase {
/** True number of columns for the internal matrix. This number may differ
* from num_cols_ as memory alignment might be used. */
MatrixIndexT stride_;
private:
KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase);
};
/// A class for storing matrices.
template<typename Real>
template <typename Real>
class Matrix : public MatrixBase<Real> {
public:
/// Empty constructor.
Matrix();
/// Basic constructor.
Matrix(const MatrixIndexT r, const MatrixIndexT c,
Matrix(const MatrixIndexT r,
const MatrixIndexT c,
MatrixResizeType resize_type = kSetZero,
MatrixStrideType stride_type = kDefaultStride):
MatrixBase<Real>() { Resize(r, c, resize_type, stride_type); }
MatrixStrideType stride_type = kDefaultStride)
: MatrixBase<Real>() {
Resize(r, c, resize_type, stride_type);
}
/// Swaps the contents of *this and *other. Shallow swap.
void Swap(Matrix<Real> *other);
/// Constructor from any MatrixBase. Can also copy with transpose.
/// Allocates new memory.
explicit Matrix(const MatrixBase<Real> & M,
explicit Matrix(const MatrixBase<Real> &M,
MatrixTransposeType trans = kNoTrans);
/// Same as above, but need to avoid default copy constructor.
Matrix(const Matrix<Real> & M); // (cannot make explicit)
Matrix(const Matrix<Real> &M); // (cannot make explicit)
/// Copy constructor: as above, but from another type.
template<typename OtherReal>
explicit Matrix(const MatrixBase<OtherReal> & M,
template <typename OtherReal>
explicit Matrix(const MatrixBase<OtherReal> &M,
MatrixTransposeType trans = kNoTrans);
/// Copy constructor taking TpMatrix...
//template <typename OtherReal>
//explicit Matrix(const TpMatrix<OtherReal> & M,
//MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
//if (trans == kNoTrans) {
//Resize(M.NumRows(), M.NumCols(), kUndefined);
//this->CopyFromTp(M);
// template <typename OtherReal>
// explicit Matrix(const TpMatrix<OtherReal> & M,
// MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
// if (trans == kNoTrans) {
// Resize(M.NumRows(), M.NumCols(), kUndefined);
// this->CopyFromTp(M);
//} else {
//Resize(M.NumCols(), M.NumRows(), kUndefined);
//this->CopyFromTp(M, kTrans);
// Resize(M.NumCols(), M.NumRows(), kUndefined);
// this->CopyFromTp(M, kTrans);
//}
//}
/// read from stream.
// Unlike one in base, allows resizing.
void Read(std::istream & in, bool binary);
void Read(std::istream &in, bool binary);
/// Remove a specified row.
void RemoveRow(MatrixIndexT i);
/// Transpose the matrix. Works for non-square
/// matrices as well as square ones.
//void Transpose();
// void Transpose();
/// Distructor to free matrices.
~Matrix() { Destroy(); }
@ -671,7 +716,7 @@ class Matrix : public MatrixBase<Real> {
MatrixStrideType stride_type = kDefaultStride);
/// Assignment operator that takes MatrixBase.
Matrix<Real> &operator = (const MatrixBase<Real> &other) {
Matrix<Real> &operator=(const MatrixBase<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols(), kUndefined);
@ -680,7 +725,7 @@ class Matrix : public MatrixBase<Real> {
}
/// Assignment operator. Needed for inclusion in std::vector.
Matrix<Real> &operator = (const Matrix<Real> &other) {
Matrix<Real> &operator=(const Matrix<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols(), kUndefined);
@ -694,13 +739,14 @@ class Matrix : public MatrixBase<Real> {
void Destroy();
/// Init assumes the current class contents are invalid (i.e. junk or have
/// already been freed), and it sets the matrix to newly allocated memory with
/// the specified number of rows and columns. r == c == 0 is acceptable. The data
/// already been freed), and it sets the matrix to newly allocated memory
/// with
/// the specified number of rows and columns. r == c == 0 is acceptable.
/// The data
/// memory contents will be undefined.
void Init(const MatrixIndexT r,
const MatrixIndexT c,
const MatrixStrideType stride_type);
};
/// @} end "addtogroup matrix_group"
@ -710,7 +756,7 @@ class Matrix : public MatrixBase<Real> {
/// A structure containing the HTK header.
/// [TODO: change the style of the variables to Kaldi-compliant]
template<typename Real>
template <typename Real>
class SubMatrix : public MatrixBase<Real> {
public:
// Initialize a SubMatrix from part of a matrix; this is
@ -718,7 +764,7 @@ class SubMatrix : public MatrixBase<Real> {
// This initializer is against the proper semantics of "const", since
// SubMatrix can change its contents. It would be hard to implement
// a "const-safe" version of this class.
SubMatrix(const MatrixBase<Real>& T,
SubMatrix(const MatrixBase<Real> &T,
const MatrixIndexT ro, // row offset, 0 < ro < NumRows()
const MatrixIndexT r, // number of rows, r > 0
const MatrixIndexT co, // column offset, 0 < co < NumCols()
@ -735,13 +781,13 @@ class SubMatrix : public MatrixBase<Real> {
/// This type of constructor is needed for Range() to work [in Matrix base
/// class]. Cannot make it explicit.
SubMatrix<Real> (const SubMatrix &other):
MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_,
other.stride_) {}
SubMatrix<Real>(const SubMatrix &other)
: MatrixBase<Real>(
other.data_, other.num_cols_, other.num_rows_, other.stride_) {}
private:
/// Disallow assignment.
SubMatrix<Real> &operator = (const SubMatrix<Real> &other);
SubMatrix<Real> &operator=(const SubMatrix<Real> &other);
};
/// @} End of "addtogroup matrix_funcs_io".
@ -794,25 +840,33 @@ Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
/// the same as U->NumCols(), and we sort s from greatest to least absolute
/// value (if sort_on_absolute_value == true) or greatest to least value
/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it
/// exists, around in the same way. Note: the "absolute value" part won't matter
/// exists, around in the same way. Note: the "absolute value" part won't
matter
/// if this is an actual SVD, since singular values are non-negative.
template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real>* Vt = NULL,
bool sort_on_absolute_value = true);
/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig.
/// Creates the eigenvalue matrix D that is part of the decomposition used
Matrix::Eig.
/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2
/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a corresponding
/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a
corresponding
/// 2x2 block [lambda, mu; -mu, lambda].
/// This function will throw if any complex eigenvalues are not in complex conjugate
/// This function will throw if any complex eigenvalues are not in complex
conjugate
/// pairs (or the members of such pairs are not consecutively numbered).
template<typename Real>
void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> &imag,
void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real>
&imag,
MatrixBase<Real> *D);
/// The following function is used in Matrix::Power, and separately tested, so we
/// declare it here mainly for the testing code to see. It takes a complex value to
/// a power using a method that will work for noninteger powers (but will fail if the
/// The following function is used in Matrix::Power, and separately tested, so
we
/// declare it here mainly for the testing code to see. It takes a complex
value to
/// a power using a method that will work for noninteger powers (but will fail
if the
/// complex value is real and negative).
template<typename Real>
bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
@ -823,17 +877,17 @@ bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
/// \addtogroup matrix_funcs_io
/// @{
template<typename Real>
std::ostream & operator << (std::ostream & Out, const MatrixBase<Real> & M);
template <typename Real>
std::ostream &operator<<(std::ostream &Out, const MatrixBase<Real> &M);
template<typename Real>
std::istream & operator >> (std::istream & In, MatrixBase<Real> & M);
template <typename Real>
std::istream &operator>>(std::istream &In, MatrixBase<Real> &M);
// The Matrix read allows resizing, so we override the MatrixBase one.
template<typename Real>
std::istream & operator >> (std::istream & In, Matrix<Real> & M);
template <typename Real>
std::istream &operator>>(std::istream &In, Matrix<Real> &M);
template<typename Real>
template <typename Real>
bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
}
@ -844,7 +898,6 @@ bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
} // namespace kaldi
// we need to include the implementation and some
// template specializations.
#include "matrix/kaldi-matrix-inl.h"

@ -26,32 +26,33 @@
namespace kaldi {
template<typename Real>
std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) {
template <typename Real>
std::ostream &operator<<(std::ostream &os, const VectorBase<Real> &rv) {
rv.Write(os, false);
return os;
}
template<typename Real>
std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) {
template <typename Real>
std::istream &operator>>(std::istream &is, VectorBase<Real> &rv) {
rv.Read(is, false);
return is;
}
template<typename Real>
std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
template <typename Real>
std::istream &operator>>(std::istream &is, Vector<Real> &rv) {
rv.Read(is, false);
return is;
}
//template<>
//template<>
//void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
// template<>
// template<>
// void VectorBase<float>::AddVec(const float alpha, const VectorBase<float>
// &rv);
//template<>
//template<>
//void VectorBase<double>::AddVec<double>(const double alpha,
//const VectorBase<double> &rv);
// template<>
// template<>
// void VectorBase<double>::AddVec<double>(const double alpha,
// const VectorBase<double> &rv);
} // namespace kaldi

File diff suppressed because it is too large Load Diff

@ -37,7 +37,7 @@ namespace kaldi {
/// Provides a vector abstraction class.
/// This class provides a way to work with vectors in kaldi.
/// It encapsulates basic operations and memory optimizations.
template<typename Real>
template <typename Real>
class VectorBase {
public:
/// Set vector to all zeros.
@ -53,23 +53,23 @@ class VectorBase {
inline MatrixIndexT Dim() const { return dim_; }
/// Returns the size in memory of the vector, in bytes.
inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); }
inline MatrixIndexT SizeInBytes() const { return (dim_ * sizeof(Real)); }
/// Returns a pointer to the start of the vector's data.
inline Real* Data() { return data_; }
inline Real *Data() { return data_; }
/// Returns a pointer to the start of the vector's data (const).
inline const Real* Data() const { return data_; }
inline const Real *Data() const { return data_; }
/// Indexing operator (const).
inline Real operator() (MatrixIndexT i) const {
inline Real operator()(MatrixIndexT i) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i);
}
/// Indexing operator (non-const).
inline Real & operator() (MatrixIndexT i) {
inline Real &operator()(MatrixIndexT i) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i);
@ -98,12 +98,12 @@ class VectorBase {
void CopyFromVec(const VectorBase<Real> &v);
/// Copy data from another vector of different type (double vs. float)
template<typename OtherReal>
template <typename OtherReal>
void CopyFromVec(const VectorBase<OtherReal> &v);
/// Performs a row stack of the matrix M
void CopyRowsFromMat(const MatrixBase<Real> &M);
template<typename OtherReal>
template <typename OtherReal>
void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
/// Performs a column stack of the matrix M
@ -113,12 +113,12 @@ class VectorBase {
/// this->Copy(M[row]).
void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
/// Extracts a row of the matrix M with type conversion.
template<typename OtherReal>
template <typename OtherReal>
void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
/// Extracts a column of the matrix M.
template<typename OtherReal>
void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
template <typename OtherReal>
void CopyColFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT col);
/// Reads from C++ stream (option to add to existing contents).
/// Throws exception on failure
@ -129,19 +129,21 @@ class VectorBase {
friend class VectorBase<double>;
friend class VectorBase<float>;
protected:
/// Destructor; does not deallocate memory, this is handled by child classes.
/// Destructor; does not deallocate memory, this is handled by child
/// classes.
/// This destructor is protected so this object can only be
/// deleted via a child.
~VectorBase() {}
/// Empty initializer, corresponds to vector of zero size.
explicit VectorBase(): data_(NULL), dim_(0) {
explicit VectorBase() : data_(NULL), dim_(0) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
/// data memory area
Real* data_;
Real *data_;
/// dimension of vector
MatrixIndexT dim_;
KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
@ -151,25 +153,28 @@ class VectorBase {
*
* This class provides a way to work with vectors in kaldi.
* It encapsulates basic operations and memory optimizations. */
template<typename Real>
class Vector: public VectorBase<Real> {
template <typename Real>
class Vector : public VectorBase<Real> {
public:
/// Constructor that takes no arguments. Initializes to empty.
Vector(): VectorBase<Real>() {}
Vector() : VectorBase<Real>() {}
/// Constructor with specific size. Sets to all-zero by default
/// if set_zero == false, memory contents are undefined.
explicit Vector(const MatrixIndexT s,
MatrixResizeType resize_type = kSetZero)
: VectorBase<Real>() { Resize(s, resize_type); }
: VectorBase<Real>() {
Resize(s, resize_type);
}
/// Copy constructor from CUDA vector
/// This is defined in ../cudamatrix/cu-vector.h
//template<typename OtherReal>
//explicit Vector(const CuVectorBase<OtherReal> &cu);
// template<typename OtherReal>
// explicit Vector(const CuVectorBase<OtherReal> &cu);
/// Copy constructor. The need for this is controversial.
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit)
Vector(const Vector<Real> &v)
: VectorBase<Real>() { // (cannot be explicit)
Resize(v.Dim(), kUndefined);
this->CopyFromVec(v);
}
@ -181,19 +186,19 @@ class Vector: public VectorBase<Real> {
}
/// Type conversion constructor.
template<typename OtherReal>
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
template <typename OtherReal>
explicit Vector(const VectorBase<OtherReal> &v) : VectorBase<Real>() {
Resize(v.Dim(), kUndefined);
this->CopyFromVec(v);
}
// Took this out since it is unsafe : Arnab
// /// Constructor from a pointer and a size; copies the data to a location
// /// it owns.
// Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
// Resize(s);
// Took this out since it is unsafe : Arnab
// /// Constructor from a pointer and a size; copies the data to a location
// /// it owns.
// Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
// Resize(s);
// CopyFromPtr(Data, s);
// }
// }
/// Swaps the contents of *this and *other. Shallow swap.
@ -219,59 +224,63 @@ class Vector: public VectorBase<Real> {
void RemoveElement(MatrixIndexT i);
/// Assignment operator.
Vector<Real> &operator = (const Vector<Real> &other) {
Vector<Real> &operator=(const Vector<Real> &other) {
Resize(other.Dim(), kUndefined);
this->CopyFromVec(other);
return *this;
}
/// Assignment operator that takes VectorBase.
Vector<Real> &operator = (const VectorBase<Real> &other) {
Vector<Real> &operator=(const VectorBase<Real> &other) {
Resize(other.Dim(), kUndefined);
this->CopyFromVec(other);
return *this;
}
private:
/// Init assumes the current contents of the class are invalid (i.e. junk or
/// has already been freed), and it sets the vector to newly allocated memory
/// with the specified dimension. dim == 0 is acceptable. The memory contents
/// has already been freed), and it sets the vector to newly allocated
/// memory
/// with the specified dimension. dim == 0 is acceptable. The memory
/// contents
/// pointed to by data_ will be undefined.
void Init(const MatrixIndexT dim);
/// Destroy function, called internally.
void Destroy();
};
/// Represents a non-allocating general vector which can be defined
/// as a sub-vector of higher-level vector [or as the row of a matrix].
template<typename Real>
template <typename Real>
class SubVector : public VectorBase<Real> {
public:
/// Constructor from a Vector or SubVector.
/// SubVectors are not const-safe and it's very hard to make them
/// so for now we just give up. This function contains const_cast.
SubVector(const VectorBase<Real> &t, const MatrixIndexT origin,
const MatrixIndexT length) : VectorBase<Real>() {
SubVector(const VectorBase<Real> &t,
const MatrixIndexT origin,
const MatrixIndexT length)
: VectorBase<Real>() {
// following assert equiv to origin>=0 && length>=0 &&
// origin+length <= rt.dim_
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin) +
static_cast<UnsignedMatrixIndexT>(length) <=
static_cast<UnsignedMatrixIndexT>(t.Dim()));
VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
VectorBase<Real>::data_ = const_cast<Real *>(t.Data() + origin);
VectorBase<Real>::dim_ = length;
}
/// This constructor initializes the vector to point at the contents
/// of this packed matrix (SpMatrix or TpMatrix).
// SubVector(const PackedMatrix<Real> &M) {
//VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
//VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2;
// VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
// VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2;
//}
/// Copy constructor
SubVector(const SubVector &other) : VectorBase<Real> () {
SubVector(const SubVector &other) : VectorBase<Real>() {
// this copy constructor needed for Range() to work in base class.
VectorBase<Real>::data_ = other.data_;
VectorBase<Real>::dim_ = other.dim_;
@ -280,14 +289,14 @@ class SubVector : public VectorBase<Real> {
/// Constructor from a pointer to memory and a length. Keeps a pointer
/// to the data but does not take ownership (will never delete).
/// Caution: this constructor enables you to evade const constraints.
SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real> () {
VectorBase<Real>::data_ = const_cast<Real*>(data);
SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real>() {
VectorBase<Real>::data_ = const_cast<Real *>(data);
VectorBase<Real>::dim_ = length;
}
/// This operation does not preserve const-ness, so be careful.
SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
VectorBase<Real>::data_ = const_cast<Real *>(matrix.RowData(row));
VectorBase<Real>::dim_ = matrix.NumCols();
}
@ -295,7 +304,7 @@ class SubVector : public VectorBase<Real> {
private:
/// Disallow assignment operator.
SubVector & operator = (const SubVector &other) {}
SubVector &operator=(const SubVector &other) {}
};
/// @} end of "addtogroup matrix_group"
@ -303,43 +312,41 @@ class SubVector : public VectorBase<Real> {
/// @{
/// Output to a C++ stream. Non-binary by default (use Write for
/// binary output).
template<typename Real>
std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v);
template <typename Real>
std::ostream &operator<<(std::ostream &out, const VectorBase<Real> &v);
/// Input from a C++ stream. Will automatically read text or
/// binary data from the stream.
template<typename Real>
std::istream & operator >> (std::istream & in, VectorBase<Real> & v);
template <typename Real>
std::istream &operator>>(std::istream &in, VectorBase<Real> &v);
/// Input from a C++ stream. Will automatically read text or
/// binary data from the stream.
template<typename Real>
std::istream & operator >> (std::istream & in, Vector<Real> & v);
template <typename Real>
std::istream &operator>>(std::istream &in, Vector<Real> &v);
/// @} end of \addtogroup matrix_funcs_io
/// \addtogroup matrix_funcs_scalar
/// @{
//template<typename Real>
//bool ApproxEqual(const VectorBase<Real> &a,
//const VectorBase<Real> &b, Real tol = 0.01) {
//return a.ApproxEqual(b, tol);
// template<typename Real>
// bool ApproxEqual(const VectorBase<Real> &a,
// const VectorBase<Real> &b, Real tol = 0.01) {
// return a.ApproxEqual(b, tol);
//}
//template<typename Real>
//inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
//float tol = 0.01) {
//KALDI_ASSERT(a.ApproxEqual(b, tol));
// template<typename Real>
// inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
// float tol = 0.01) {
// KALDI_ASSERT(a.ApproxEqual(b, tol));
//}
} // namespace kaldi
// we need to include the implementation
#include "matrix/kaldi-vector-inl.h"
#endif // KALDI_MATRIX_KALDI_VECTOR_H_

@ -27,18 +27,15 @@
namespace kaldi {
// this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library
// we are writing them as literals because we don't want to include here matrix/kaldi-blas.h,
// we are writing them as literals because we don't want to include here
// matrix/kaldi-blas.h,
// which puts many symbols into global scope (like "real") via the header f2c.h
typedef enum {
kTrans = 112, // = CblasTrans
kNoTrans = 111 // = CblasNoTrans
} MatrixTransposeType;
typedef enum {
kSetZero,
kUndefined,
kCopyData
} MatrixResizeType;
typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType;
typedef enum {
@ -53,24 +50,33 @@ typedef enum {
kTakeMeanAndCheck
} SpCopyType;
template<typename Real> class VectorBase;
template<typename Real> class Vector;
template<typename Real> class SubVector;
template<typename Real> class MatrixBase;
template<typename Real> class SubMatrix;
template<typename Real> class Matrix;
template <typename Real>
class VectorBase;
template <typename Real>
class Vector;
template <typename Real>
class SubVector;
template <typename Real>
class MatrixBase;
template <typename Real>
class SubMatrix;
template <typename Real>
class Matrix;
/// This class provides a way for switching between double and float types.
template<typename T> class OtherReal { }; // useful in reading+writing routines
template <typename T>
class OtherReal {}; // useful in reading+writing routines
// to switch double and float.
/// A specialized class for switching from float to double.
template<> class OtherReal<float> {
template <>
class OtherReal<float> {
public:
typedef double Real;
};
/// A specialized class for switching from double to float.
template<> class OtherReal<double> {
template <>
class OtherReal<double> {
public:
typedef float Real;
};
@ -81,12 +87,10 @@ typedef int32 SignedMatrixIndexT;
typedef uint32 UnsignedMatrixIndexT;
// If you want to use size_t for the index type, do as follows instead:
//typedef size_t MatrixIndexT;
//typedef ssize_t SignedMatrixIndexT;
//typedef size_t UnsignedMatrixIndexT;
// typedef size_t MatrixIndexT;
// typedef ssize_t SignedMatrixIndexT;
// typedef size_t UnsignedMatrixIndexT;
}
#endif // KALDI_MATRIX_MATRIX_COMMON_H_

@ -1,14 +1,15 @@
project(kaldi)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
)
add_subdirectory(base)
add_subdirectory(util)
add_subdirectory(lat)
add_subdirectory(fstext)
add_subdirectory(decoder)
add_subdirectory(lm)
if(WITH_ASR)
add_subdirectory(lat)
add_subdirectory(fstext)
add_subdirectory(decoder)
add_subdirectory(lm)
add_subdirectory(fstbin)
add_subdirectory(lmbin)
add_subdirectory(fstbin)
add_subdirectory(lmbin)
endif()

@ -44,7 +44,19 @@ typedef float BaseFloat;
#ifndef COMPILE_WITHOUT_OPENFST
#ifdef WITH_ASR
#include <fst/types.h>
#else
using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;
using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;
#endif
namespace kaldi {
using ::int16;

@ -0,0 +1,18 @@
# set(CMAKE_CXX_STANDARD 11)
# # fastdeploy
# set(FASTDEPLOY_INSTALL_DIR "fdlib/fastdeploy-linux-x64-1.0.4" CACHE STRING force)
# if(NOT EXISTS ${FASTDEPLOY_INSTALL_DIR})
# message(FATAL_ERROR "Please using cmake -B build -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR}")
# endif()
# include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# # FastDeploy
# include_directories(${FASTDEPLOY_INCS})
add_executable(infer_onnx_silero_vad ${CMAKE_CURRENT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h)
# FastDeploy
target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS})

@ -0,0 +1,121 @@
English | [简体中文](README_CN.md)
# Silero VAD Deployment Example
This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
Before deployment, two steps require confirmation.
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
```bash
mkdir build
cd build
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# inference
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
- The above command works for Linux or MacOS. Refer to:
- [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows
## VAD C++ Interface
### Vad Class
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**Parameter**
> * **model_file**(str): Model file path
> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
### setAudioCofig function
**Must be called before the `init` function**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**Parameter**
> * **sr**(int): sampling rate
> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
> * **threshold**(float): Result probability judgment threshold
> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
> * **speech_pad_ms**(int): Used to calculate the end time of the speech
### init function
Used to initialize audio-related parameters.
```c++
void Vad::init();
```
### loadAudio function
Load audio.
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**Parameter**
> * **wavPath**(str): Audio file path
### Predict function
Used to start model reasoning.
```c++
bool Vad::Predict();
```
### getResult function
**Used to obtain reasoning results**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**Parameter**
> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
**The output result format is**`std::vector<std::map<std::string, float>>`
> Output a list, each element is a speech fragment
>
> Each clip can use 'start' to get the start time and 'end' to get the end time
### Tips
1. `The setAudioCofig`function must be called before the `init` function
2. The sampling rate of the input audio file must be consistent with that set in the code
- [Model Description](../)
- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)

@ -0,0 +1,119 @@
[English](README.md) | 简体中文
# Silero VAD 部署示例
本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境下载预编译部署库和samples代码参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。
```bash
mkdir build
cd build
# 下载FastDeploy预编译库用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# 推理
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md)
## VAD C++ 接口
### Vad 类
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**参数**
> * **model_file**(str): 模型文件路径
> * **runtime_option**(RuntimeOption): 后端推理配置默认为None即采用默认配置
### setAudioCofig 函数
**必须在`init`函数前调用**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**参数**
> * **sr**(int): 采样率
> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小
> * **threshold**(float): 结果概率判断阈值
> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值
> * **speech_pad_ms**(int): 用于计算 speach 结束时刻
### init 函数
用于初始化音频相关参数
```c++
void Vad::init();
```
### loadAudio 函数
加载音频
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**参数**
> * **wavPath**(str): 音频文件路径
### Predict 函数
用于开始模型推理
```c++
bool Vad::Predict();
```
### getResult 函数
**用于获取推理结果**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**参数**
> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃
> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻
> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻
> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段
**输出结果格式为**`std::vector<std::map<std::string, float>>`
> 输出一个列表,每个元素是一个讲话片段
>
> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻
### 提示
1. `setAudioCofig`函数必须在`init`函数前调用
2. 输入的音频文件的采样率必须与代码中设置的保持一致
- [模型介绍](../)
- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md)

@ -0,0 +1,65 @@
#include "vad.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio "
"run_option, "
"e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav"
<< std::endl;
return -1;
}
std::string model_file = argv[1];
std::string audio_file = argv[2];
int sr = 16000;
Vad vad(model_file);
// custom config, but must be set before init
vad.SetConfig(sr, 32, 0.45f, 200, 0, 0);
vad.Init();
std::vector<float> inputWav; // [0, 1]
wav::WavReader wav_reader = wav::WavReader(audio_file);
assert(wav_reader.sample_rate() == sr);
auto num_samples = wav_reader.num_samples();
inputWav.resize(num_samples);
for (int i = 0; i < num_samples; i++) {
inputWav[i] = wav_reader.data()[i] / 32768;
}
int window_size_samples = vad.WindowSizeSamples();
for (int64_t j = 0; j < num_samples; j += window_size_samples) {
auto start = j;
auto end = start + window_size_samples >= num_samples
? num_samples
: start + window_size_samples;
auto current_chunk_size = end - start;
std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
assert(r.size() == current_chunk_size);
if (!vad.ForwardChunk(r)) {
std::cerr << "Failed to inference while using model:"
<< vad.ModelName() << "." << std::endl;
return false;
}
Vad::State s = vad.Postprocess();
std::cout << s << " ";
}
std::cout << std::endl;
std::vector<std::map<std::string, float>> result = vad.GetResult();
for (auto& res : result) {
std::cout << "speak start: " << res["start"]
<< " s, end: " << res["end"] << " s | ";
}
std::cout << "\b\b " << std::endl;
vad.Reset();
return 0;
}

@ -0,0 +1,306 @@
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "vad.h"
#include <cstring>
#include <iomanip>
#ifdef NDEBUG
#define LOG_DEBUG \
::fastdeploy::FDLogger(true, "[DEBUG]") << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t"
#else
#define LOG_DEBUG \
::fastdeploy::FDLogger(false, "[DEBUG]") \
<< __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
#endif
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption&
custom_option /* = fastdeploy::RuntimeOption() */) {
valid_cpu_backends = {fastdeploy::Backend::ORT,
fastdeploy::Backend::OPENVINO};
valid_gpu_backends = {fastdeploy::Backend::ORT, fastdeploy::Backend::TRT};
runtime_option = custom_option;
// ORT backend
runtime_option.UseCpu();
runtime_option.UseOrtBackend();
runtime_option.model_format = fastdeploy::ModelFormat::ONNX;
// grap opt level
runtime_option.ort_option.graph_optimization_level = 99;
// one-thread
runtime_option.ort_option.intra_op_num_threads = 1;
runtime_option.ort_option.inter_op_num_threads = 1;
// model path
runtime_option.model_file = model_file;
}
void Vad::Init() {
std::call_once(init_, [&]() { initialized = Initialize(); });
}
std::string Vad::ModelName() const { return "VAD"; }
void Vad::SetConfig(int sr,
int frame_ms,
float threshold,
int min_silence_duration_ms,
int speech_pad_left_ms,
int speech_pad_right_ms) {
if (initialized) {
fastdeploy::FDERROR << "SetConfig must be called before init"
<< std::endl;
throw std::runtime_error("SetConfig must be called before init");
}
sample_rate_ = sr;
sr_per_ms_ = sr / 1000;
threshold_ = threshold;
frame_ms_ = frame_ms;
min_silence_samples_ = min_silence_duration_ms * sr_per_ms_;
speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_;
speech_pad_right_samples_ = speech_pad_right_ms * sr_per_ms_;
// init chunk size
window_size_samples_ = frame_ms * sr_per_ms_;
current_chunk_size_ = window_size_samples_;
fastdeploy::FDINFO << "sr=" << sr << " threshold=" << threshold
<< " frame_ms=" << frame_ms
<< " min_silence_duration_ms=" << min_silence_duration_ms
<< " speech_pad_left_ms=" << speech_pad_left_ms
<< " speech_pad_right_ms=" << speech_pad_right_ms;
}
void Vad::Reset() {
std::memset(h_.data(), 0.0f, h_.size() * sizeof(float));
std::memset(c_.data(), 0.0f, c_.size() * sizeof(float));
triggerd_ = false;
temp_end_ = 0;
current_sample_ = 0;
speakStart_.clear();
speakEnd_.clear();
states_.clear();
}
bool Vad::Initialize() {
// input & output holder
inputTensors_.resize(4);
outputTensors_.resize(3);
// input shape
input_node_dims_.emplace_back(1);
input_node_dims_.emplace_back(window_size_samples_);
// sr buffer
sr_.resize(1);
sr_[0] = sample_rate_;
// hidden state buffer
h_.resize(size_hc_);
c_.resize(size_hc_);
Reset();
// InitRuntime
if (!InitRuntime()) {
fastdeploy::FDERROR << "Failed to initialize fastdeploy backend."
<< std::endl;
return false;
}
fastdeploy::FDINFO << "init done.";
return true;
}
bool Vad::ForwardChunk(std::vector<float>& chunk) {
// last chunk may not be window_size_samples_
input_node_dims_.back() = chunk.size();
assert(window_size_samples_ >= chunk.size());
current_chunk_size_ = chunk.size();
inputTensors_[0].name = "input";
inputTensors_[0].SetExternalData(
input_node_dims_, fastdeploy::FDDataType::FP32, chunk.data());
inputTensors_[1].name = "sr";
inputTensors_[1].SetExternalData(
sr_node_dims_, fastdeploy::FDDataType::INT64, sr_.data());
inputTensors_[2].name = "h";
inputTensors_[2].SetExternalData(
hc_node_dims_, fastdeploy::FDDataType::FP32, h_.data());
inputTensors_[3].name = "c";
inputTensors_[3].SetExternalData(
hc_node_dims_, fastdeploy::FDDataType::FP32, c_.data());
if (!Infer(inputTensors_, &outputTensors_)) {
return false;
}
// Push forward sample index
current_sample_ += current_chunk_size_;
return true;
}
const Vad::State& Vad::Postprocess() {
// update prob, h, c
outputProb_ = *(float*)outputTensors_[0].Data();
auto* hn = static_cast<float*>(outputTensors_[1].MutableData());
std::memcpy(h_.data(), hn, h_.size() * sizeof(float));
auto* cn = static_cast<float*>(outputTensors_[2].MutableData());
std::memcpy(c_.data(), cn, c_.size() * sizeof(float));
if (outputProb_ < threshold_ && !triggerd_) {
// 1. Silence
LOG_DEBUG << "{ silence: " << 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::SIL);
} else if (outputProb_ >= threshold_ && !triggerd_) {
// 2. Start
triggerd_ = true;
speech_start_ =
current_sample_ - current_chunk_size_ - speech_pad_left_samples_;
float start_sec = 1.0 * speech_start_ / sample_rate_;
speakStart_.emplace_back(start_sec);
LOG_DEBUG << "{ speech start: " << start_sec
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::START);
} else if (outputProb_ >= threshold_ - 0.15 && triggerd_) {
// 3. Continue
if (temp_end_ != 0) {
// speech prob relaxation, speech continues again
LOG_DEBUG << "{ speech fake end(sil < min_silence_ms) to continue: "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
temp_end_ = 0;
} else {
// speech prob relaxation, keep tracking speech
LOG_DEBUG << "{ speech continue: "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
}
states_.emplace_back(Vad::State::SPEECH);
} else if (outputProb_ < threshold_ - 0.15 && triggerd_) {
// 4. End
if (temp_end_ == 0) {
temp_end_ = current_sample_;
}
// check possible speech end
if (current_sample_ - temp_end_ < min_silence_samples_) {
// a. silence < min_slience_samples, continue speaking
LOG_DEBUG << "{ speech fake end(sil < min_silence_ms): "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::SIL);
} else {
// b. silence >= min_slience_samples, end speaking
speech_end_ = current_sample_ + speech_pad_right_samples_;
temp_end_ = 0;
triggerd_ = false;
auto end_sec = 1.0 * speech_end_ / sample_rate_;
speakEnd_.emplace_back(end_sec);
LOG_DEBUG << "{ speech end: " << end_sec
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::END);
}
}
return states_.back();
}
const std::vector<std::map<std::string, float>> Vad::GetResult(
float removeThreshold,
float expandHeadThreshold,
float expandTailThreshold,
float mergeThreshold) const {
float audioLength = 1.0 * current_sample_ / sample_rate_;
if (speakStart_.empty() && speakEnd_.empty()) {
return {};
}
if (speakEnd_.size() != speakStart_.size()) {
// set the audio length as the last end
speakEnd_.emplace_back(audioLength);
}
// Remove too short segments
// auto startIter = speakStart_.begin();
// auto endIter = speakEnd_.begin();
// while (startIter != speakStart_.end()) {
// if (removeThreshold < audioLength &&
// *endIter - *startIter < removeThreshold) {
// startIter = speakStart_.erase(startIter);
// endIter = speakEnd_.erase(endIter);
// } else {
// startIter++;
// endIter++;
// }
// }
// // Expand to avoid to tight cut.
// startIter = speakStart_.begin();
// endIter = speakEnd_.begin();
// *startIter = std::fmax(0.f, *startIter - expandHeadThreshold);
// *endIter = std::fmin(*endIter + expandTailThreshold, *(startIter + 1));
// endIter = speakEnd_.end() - 1;
// startIter = speakStart_.end() - 1;
// *startIter = fmax(*startIter - expandHeadThreshold, *(endIter - 1));
// *endIter = std::fmin(*endIter + expandTailThreshold, audioLength);
// for (int i = 1; i < speakStart_.size() - 1; ++i) {
// speakStart_[i] = std::fmax(speakStart_[i] - expandHeadThreshold,
// speakEnd_[i - 1]);
// speakEnd_[i] = std::fmin(speakEnd_[i] + expandTailThreshold,
// speakStart_[i + 1]);
// }
// // Merge very closed segments
// startIter = speakStart_.begin() + 1;
// endIter = speakEnd_.begin();
// while (startIter != speakStart_.end()) {
// if (*startIter - *endIter < mergeThreshold) {
// startIter = speakStart_.erase(startIter);
// endIter = speakEnd_.erase(endIter);
// } else {
// startIter++;
// endIter++;
// }
// }
std::vector<std::map<std::string, float>> result;
for (int i = 0; i < speakStart_.size(); ++i) {
result.emplace_back(std::map<std::string, float>(
{{"start", speakStart_[i]}, {"end", speakEnd_[i]}}));
}
return result;
}
std::ostream& operator<<(std::ostream& os, const Vad::State& s) {
switch (s) {
case Vad::State::SIL:
os << "[SIL]";
break;
case Vad::State::START:
os << "[STA]";
break;
case Vad::State::SPEECH:
os << "[SPE]";
break;
case Vad::State::END:
os << "[END]";
break;
default:
// illegal state
os << "[ILL]";
break;
}
return os;
}

@ -0,0 +1,124 @@
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <mutex>
#include <vector>
#include "./wav.h"
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/runtime.h"
class Vad : public fastdeploy::FastDeployModel {
public:
enum class State { SIL = 0, START, SPEECH, END };
friend std::ostream& operator<<(std::ostream& os, const Vad::State& s);
Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option =
fastdeploy::RuntimeOption());
void Init();
void Reset();
void SetConfig(int sr,
int frame_ms,
float threshold,
int min_silence_duration_ms,
int speech_pad_left_ms,
int speech_pad_right_ms);
bool ForwardChunk(std::vector<float>& chunk);
const State& Postprocess();
const std::vector<std::map<std::string, float>> GetResult(
float removeThreshold = 0.0,
float expandHeadThreshold = 0.0,
float expandTailThreshold = 0,
float mergeThreshold = 0.0) const;
const std::vector<State> GetStates() const { return states_; }
int SampleRate() const { return sample_rate_; }
int FrameMs() const { return frame_ms_; }
int64_t WindowSizeSamples() const { return window_size_samples_; }
float Threshold() const { return threshold_; }
int MinSilenceDurationMs() const {
return min_silence_samples_ / sample_rate_;
}
int SpeechPadLeftMs() const {
return speech_pad_left_samples_ / sample_rate_;
}
int SpeechPadRightMs() const {
return speech_pad_right_samples_ / sample_rate_;
}
int MinSilenceSamples() const { return min_silence_samples_; }
int SpeechPadLeftSamples() const { return speech_pad_left_samples_; }
int SpeechPadRightSamples() const { return speech_pad_right_samples_; }
std::string ModelName() const override;
private:
bool Initialize();
private:
std::once_flag init_;
// input and output
std::vector<fastdeploy::FDTensor> inputTensors_;
std::vector<fastdeploy::FDTensor> outputTensors_;
// model states
bool triggerd_ = false;
unsigned int speech_start_ = 0;
unsigned int speech_end_ = 0;
unsigned int temp_end_ = 0;
unsigned int current_sample_ = 0;
unsigned int current_chunk_size_ = 0;
// MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes
float outputProb_;
std::vector<float> speakStart_;
mutable std::vector<float> speakEnd_;
std::vector<State> states_;
/* ========================================================================
*/
int sample_rate_ = 16000;
int frame_ms_ = 32; // 32, 64, 96 for 16k
float threshold_ = 0.5f;
int64_t window_size_samples_; // support 256 512 768 for 8k; 512 1024 1536
// for 16k.
int sr_per_ms_; // support 8 or 16
int min_silence_samples_; // sr_per_ms_ * frame_ms_
int speech_pad_left_samples_{0}; // usually 250ms
int speech_pad_right_samples_{0}; // usually 0
/* ========================================================================
*/
std::vector<int64_t> sr_;
const size_t size_hc_ = 2 * 1 * 64; // It's FIXED.
std::vector<float> h_;
std::vector<float> c_;
std::vector<int64_t> input_node_dims_;
const std::vector<int64_t> sr_node_dims_ = {1};
const std::vector<int64_t> hc_node_dims_ = {2, 1, 64};
};

@ -0,0 +1,197 @@
// Copyright (c) 2016 Personal (Binbin Zhang)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
namespace wav {
struct WavHeader {
char riff[4]; // "riff"
unsigned int size;
char wav[4]; // "WAVE"
char fmt[4]; // "fmt "
unsigned int fmt_size;
uint16_t format;
uint16_t channels;
unsigned int sample_rate;
unsigned int bytes_per_second;
uint16_t block_size;
uint16_t bit;
char data[4]; // "data"
unsigned int data_size;
};
class WavReader {
public:
WavReader() : data_(nullptr) {}
explicit WavReader(const std::string& filename) { Open(filename); }
bool Open(const std::string& filename) {
FILE* fp = fopen(filename.c_str(), "rb");
if (NULL == fp) {
std::cout << "Error in read " << filename;
return false;
}
WavHeader header;
fread(&header, 1, sizeof(header), fp);
if (header.fmt_size < 16) {
fprintf(stderr,
"WaveData: expect PCM format data "
"to have fmt chunk of at least size 16.\n");
return false;
} else if (header.fmt_size > 16) {
int offset = 44 - 8 + header.fmt_size - 16;
fseek(fp, offset, SEEK_SET);
fread(header.data, 8, sizeof(char), fp);
}
// check "riff" "WAVE" "fmt " "data"
// Skip any sub-chunks between "fmt" and "data". Usually there will
// be a single "fact" sub chunk, but on Windows there can also be a
// "list" sub chunk.
while (0 != strncmp(header.data, "data", 4)) {
// We will just ignore the data in these chunks.
fseek(fp, header.data_size, SEEK_CUR);
// read next sub chunk
fread(header.data, 8, sizeof(char), fp);
}
num_channel_ = header.channels;
sample_rate_ = header.sample_rate;
bits_per_sample_ = header.bit;
int num_data = header.data_size / (bits_per_sample_ / 8);
data_ = new float[num_data]; // Create 1-dim array
num_samples_ = num_data / num_channel_;
for (int i = 0; i < num_data; ++i) {
switch (bits_per_sample_) {
case 8: {
char sample;
fread(&sample, 1, sizeof(char), fp);
data_[i] = static_cast<float>(sample);
break;
}
case 16: {
int16_t sample;
fread(&sample, 1, sizeof(int16_t), fp);
// std::cout << sample;
data_[i] = static_cast<float>(sample);
// std::cout << data_[i];
break;
}
case 32: {
int sample;
fread(&sample, 1, sizeof(int), fp);
data_[i] = static_cast<float>(sample);
break;
}
default:
fprintf(stderr, "unsupported quantization bits");
exit(1);
}
}
fclose(fp);
return true;
}
int num_channel() const { return num_channel_; }
int sample_rate() const { return sample_rate_; }
int bits_per_sample() const { return bits_per_sample_; }
int num_samples() const { return num_samples_; }
const float* data() const { return data_; }
private:
int num_channel_;
int sample_rate_;
int bits_per_sample_;
int num_samples_; // sample points per channel
float* data_;
};
class WavWriter {
public:
WavWriter(const float* data,
int num_samples,
int num_channel,
int sample_rate,
int bits_per_sample)
: data_(data),
num_samples_(num_samples),
num_channel_(num_channel),
sample_rate_(sample_rate),
bits_per_sample_(bits_per_sample) {}
void Write(const std::string& filename) {
FILE* fp = fopen(filename.c_str(), "w");
// init char 'riff' 'WAVE' 'fmt ' 'data'
WavHeader header;
char wav_header[44] = {
0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56,
0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};
memcpy(&header, wav_header, sizeof(header));
header.channels = num_channel_;
header.bit = bits_per_sample_;
header.sample_rate = sample_rate_;
header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);
header.size = sizeof(header) - 8 + header.data_size;
header.bytes_per_second =
sample_rate_ * num_channel_ * (bits_per_sample_ / 8);
header.block_size = num_channel_ * (bits_per_sample_ / 8);
fwrite(&header, 1, sizeof(header), fp);
for (int i = 0; i < num_samples_; ++i) {
for (int j = 0; j < num_channel_; ++j) {
switch (bits_per_sample_) {
case 8: {
char sample =
static_cast<char>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
case 16: {
int16_t sample =
static_cast<int16_t>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
case 32: {
int sample =
static_cast<int>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
}
}
}
fclose(fp);
}
private:
const float* data_;
int num_samples_; // total float points in data_
int num_channel_;
int sample_rate_;
int bits_per_sample_;
};
} // namespace wav
Loading…
Cancel
Save