opt to compile asr,cls,vad; add vad; format code ()

pull/2993/head
Hui Zhang 2 years ago committed by GitHub
parent 78e29c8ec4
commit b35fc01a3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,3 +1,6 @@
engine/common/base/flags.h
engine/common/base/log.h
tools/valgrind* tools/valgrind*
*log *log
fc_patch/* fc_patch/*

@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1)
set(CMAKE_VERBOSE_MAKEFILE on) set(CMAKE_VERBOSE_MAKEFILE on)
# set std-14
set(CMAKE_CXX_STANDARD 14)
include(FetchContent) include(FetchContent)
include(ExternalProject) include(ExternalProject)
@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}") get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch}) set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(CMAKE_CXX_FLAGS)
set(CMAKE_CXX_FLAGS_DEBUG)
set(CMAKE_CXX_FLAGS_RELEASE)
# set std-14
set(CMAKE_CXX_STANDARD 14)
# compiler option # compiler option
# Keep the same with openfst, -fPIC or -fpic # Keep the same with openfst, -fPIC or -fpic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb") SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall") SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")
add_compile_options(-fPIC)
############################################################################### ###############################################################################
# Option Configurations # Option Configurations
############################################################################### ###############################################################################
option(WITH_ASR "build asr" ON)
option(WITH_CLS "build cls" ON)
option(WITH_VAD "build vad" ON)
option(TEST_DEBUG "option for debug" OFF) option(TEST_DEBUG "option for debug" OFF)
option(USE_PROFILING "enable c++ profling" OFF) option(USE_PROFILING "enable c++ profling" OFF)
option(WITH_TESTING "unit test" ON) option(WITH_TESTING "unit test" ON)
@ -47,31 +59,40 @@ option(WITH_TESTING "unit test" ON)
option(USING_GPU "u2 compute on GPU." OFF) option(USING_GPU "u2 compute on GPU." OFF)
############################################################################### ###############################################################################
# Include third party # Include Third Party
############################################################################### ###############################################################################
include(gflags) include(gflags)
include(glog) include(glog)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
# paddle lib
include(paddleinference)
# gtest # gtest
if(WITH_TESTING) if(WITH_TESTING)
include(gtest) # download, build, install gtest include(gtest) # download, build, install gtest
endif() endif()
# fastdeploy
include(fastdeploy)
if(WITH_ASR)
# openfst
include(openfst)
add_dependencies(openfst gflags glog)
endif()
###############################################################################
# Find Package
###############################################################################
# python/pybind11/threads # python/pybind11/threads
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
# https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3 # https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
find_package(Python3 COMPONENTS Interpreter Development) find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG) find_package(pybind11 CONFIG)
if(Python3_FOUND)
if(WITH_ASR)
if(Python3_FOUND)
message(STATUS "Python3_FOUND = ${Python3_FOUND}") message(STATUS "Python3_FOUND = ${Python3_FOUND}")
message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}") message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}") message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
@ -79,70 +100,76 @@ if(Python3_FOUND)
message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}") message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE) set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE) set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
endif() endif()
message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}") message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}") message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
if(pybind11_FOUND) if(pybind11_FOUND)
message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}") message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}") message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}") message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
endif() endif()
# paddle libpaddle.so # paddle libpaddle.so
# paddle include and link option # paddle include and link option
# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so # -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import os;\ import os;\
import paddle;\ import paddle;\
include_dir=paddle.sysconfig.get_include();\ include_dir=paddle.sysconfig.get_include();\
paddle_dir=os.path.split(include_dir)[0];\ paddle_dir=os.path.split(include_dir)[0];\
libs_dir=os.path.join(paddle_dir, 'libs');\ libs_dir=os.path.join(paddle_dir, 'libs');\
fluid_dir=os.path.join(paddle_dir, 'fluid');\ fluid_dir=os.path.join(paddle_dir, 'fluid');\
out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\ out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\ out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
" "
OUTPUT_VARIABLE PADDLE_LINK_FLAGS OUTPUT_VARIABLE PADDLE_LINK_FLAGS
RESULT_VARIABLE SUCESS) RESULT_VARIABLE SUCESS)
message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS}) message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS) string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
# paddle compile option # paddle compile option
# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include # -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import paddle; \ import paddle; \
include_dir = paddle.sysconfig.get_include(); \ include_dir = paddle.sysconfig.get_include(); \
print(f\"-I{include_dir}\"); \ print(f\"-I{include_dir}\"); \
" "
OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS) OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS}) message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS) string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
# for LD_LIBRARY_PATH # for LD_LIBRARY_PATH
# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/) # set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import os; \ import os; \
import paddle; \ import paddle; \
include_dir=paddle.sysconfig.get_include(); \ include_dir=paddle.sysconfig.get_include(); \
paddle_dir=os.path.split(include_dir)[0]; \ paddle_dir=os.path.split(include_dir)[0]; \
libs_dir=os.path.join(paddle_dir, 'libs'); \ libs_dir=os.path.join(paddle_dir, 'libs'); \
fluid_dir=os.path.join(paddle_dir, 'fluid'); \ fluid_dir=os.path.join(paddle_dir, 'fluid'); \
out=':'.join([libs_dir, fluid_dir]); print(out); \ out=':'.join([libs_dir, fluid_dir]); print(out); \
" "
OUTPUT_VARIABLE PADDLE_LIB_DIRS) OUTPUT_VARIABLE PADDLE_LIB_DIRS)
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS}) message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
endif()
add_compile_options(-fPIC)
############################################################################### ###############################################################################
# Add local library # Add local library
############################################################################### ###############################################################################
set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine) set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)
message(STATUS "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
message(STATUS "CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}")
add_subdirectory(engine) add_subdirectory(engine)

@ -4,5 +4,5 @@ set -xe
# the build script had verified in the paddlepaddle docker image. # the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image. # please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
cmake -B build cmake -B build -DWITH_ASR=OFF -DWITH_CLS=OFF
cmake --build build -j cmake --build build -j

@ -8,11 +8,11 @@ windows_x86")
set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_VERBOSE_MAKEFILE ON)
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy) set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz) if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
exec_program("mkdir -p ${FASTDEPLOY_DIR} && exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} && wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} && tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64") mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
endif() endif()
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz) if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7")
endif() endif()
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# fix compiler flags conflict, since fastdeploy using c++11 for project
set(CMAKE_CXX_STANDARD 14)
include_directories(${FASTDEPLOY_INCS}) include_directories(${FASTDEPLOY_INCS})
message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")

@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)
add_subdirectory(asr)
add_subdirectory(common)
add_subdirectory(kaldi) add_subdirectory(kaldi)
add_subdirectory(common)
if(WITH_ASR)
add_subdirectory(asr)
endif()
if(WITH_CLS)
add_subdirectory(cls)
endif()
if(WITH_VAD)
add_subdirectory(vad)
endif()
add_subdirectory(codelab) add_subdirectory(codelab)
add_subdirectory(cls)

@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
decoder_ = std::make_unique<CTCPrefixBeamSearch>( decoder_ = std::make_unique<CTCPrefixBeamSearch>(
resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts); resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
} else { } else {
decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts); decoder_ = std::make_unique<TLGDecoder>(
resource.decoder_opts.tlg_decoder_opts);
} }
symbol_table_ = decoder_->WordSymbolTable(); symbol_table_ = decoder_->WordSymbolTable();

@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../ ${CMAKE_CURRENT_SOURCE_DIR}/../
) )
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(base)
add_subdirectory(matrix) add_subdirectory(matrix)
include_directories( include_directories(

@ -0,0 +1,20 @@
if(WITH_ASR)
add_compile_options(-DWITH_ASR)
set(PPS_FLAGS_LIB "fst/flags.h")
set(PPS_GLOB_LIB "fst/log.h")
else()
set(PPS_FLAGS_LIB "gflags/gflags.h")
set(PPS_GLOB_LIB "glog/logging.h")
endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in
${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h")
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/log.h.in
${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY
)
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h")

@ -14,4 +14,4 @@
#pragma once #pragma once
#include "fst/flags.h" #include "@PPS_FLAGS_LIB@"

@ -14,4 +14,4 @@
#pragma once #pragma once
#include "fst/log.h" #include "@PPS_GLOB_LIB@"

@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
dim_ = mean_stats_.size() - 1; dim_ = mean_stats_.size() - 1;
} }
void CMVN::ReadCMVNFromJson(string cmvn_file) { void CMVN::ReadCMVNFromJson(std::string cmvn_file) {
std::string json_str = ppspeech::ReadFile2String(cmvn_file); std::string json_str = ppspeech::ReadFile2String(cmvn_file);
picojson::value value; picojson::value value;
std::string err; std::string err;

@ -21,6 +21,7 @@
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ #ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ #define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#include <limits>
#include <map> #include <map>
#include "frontend/feature-window.h" #include "frontend/feature-window.h"

@ -7,6 +7,7 @@
#include "frontend/feature-window.h" #include "frontend/feature-window.h"
#include <cmath> #include <cmath>
#include <limits>
#include <vector> #include <vector>
#ifndef M_2PI #ifndef M_2PI

@ -17,12 +17,12 @@
*/ */
#include "frontend/rfft.h" #include "frontend/rfft.h"
#include "base/log.h"
#include <cmath> #include <cmath>
#include <memory>
#include <vector> #include <vector>
#include "base/log.h"
// see fftsg.c // see fftsg.c
#ifdef __cplusplus #ifdef __cplusplus
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w); extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);

@ -25,40 +25,41 @@
namespace kaldi { namespace kaldi {
/// Empty constructor /// Empty constructor
template<typename Real> template <typename Real>
Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { } Matrix<Real>::Matrix() : MatrixBase<Real>(NULL, 0, 0, 0) {}
/* /*
template<> template<>
template<> template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb); void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float>
&ra, const VectorBase<float> &rb);
template<> template<>
template<> template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb); void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double>
&ra, const VectorBase<double> &rb);
*/ */
template<typename Real> template <typename Real>
inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) { inline std::ostream& operator<<(std::ostream& os, const MatrixBase<Real>& M) {
M.Write(os, false); M.Write(os, false);
return os; return os;
} }
template<typename Real> template <typename Real>
inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) { inline std::istream& operator>>(std::istream& is, Matrix<Real>& M) {
M.Read(is, false); M.Read(is, false);
return is; return is;
} }
template<typename Real> template <typename Real>
inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) { inline std::istream& operator>>(std::istream& is, MatrixBase<Real>& M) {
M.Read(is, false); M.Read(is, false);
return is; return is;
} }
}// namespace kaldi } // namespace kaldi
#endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_ #endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_

File diff suppressed because it is too large Load Diff

@ -38,7 +38,7 @@ namespace kaldi {
/// Base class which provides matrix operations not involving resizing /// Base class which provides matrix operations not involving resizing
/// or allocation. Classes Matrix and SubMatrix inherit from it and take care /// or allocation. Classes Matrix and SubMatrix inherit from it and take care
/// of allocation and resizing. /// of allocation and resizing.
template<typename Real> template <typename Real>
class MatrixBase { class MatrixBase {
public: public:
// so this child can access protected members of other instances. // so this child can access protected members of other instances.
@ -62,22 +62,20 @@ class MatrixBase {
} }
/// Gives pointer to raw data (const). /// Gives pointer to raw data (const).
inline const Real* Data() const { inline const Real *Data() const { return data_; }
return data_;
}
/// Gives pointer to raw data (non-const). /// Gives pointer to raw data (non-const).
inline Real* Data() { return data_; } inline Real *Data() { return data_; }
/// Returns pointer to data for one row (non-const) /// Returns pointer to data for one row (non-const)
inline Real* RowData(MatrixIndexT i) { inline Real *RowData(MatrixIndexT i) {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_)); static_cast<UnsignedMatrixIndexT>(num_rows_));
return data_ + i * stride_; return data_ + i * stride_;
} }
/// Returns pointer to data for one row (const) /// Returns pointer to data for one row (const)
inline const Real* RowData(MatrixIndexT i) const { inline const Real *RowData(MatrixIndexT i) const {
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(num_rows_)); static_cast<UnsignedMatrixIndexT>(num_rows_));
return data_ + i * stride_; return data_ + i * stride_;
@ -85,8 +83,9 @@ class MatrixBase {
/// Indexing operator, non-const /// Indexing operator, non-const
/// (only checks sizes if compiled with -DKALDI_PARANOID) /// (only checks sizes if compiled with -DKALDI_PARANOID)
inline Real& operator() (MatrixIndexT r, MatrixIndexT c) { inline Real &operator()(MatrixIndexT r, MatrixIndexT c) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < KALDI_PARANOID_ASSERT(
static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) && static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) < static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_)); static_cast<UnsignedMatrixIndexT>(num_cols_));
@ -94,12 +93,13 @@ class MatrixBase {
} }
/// Indexing operator, provided for ease of debugging (gdb doesn't work /// Indexing operator, provided for ease of debugging (gdb doesn't work
/// with parenthesis operator). /// with parenthesis operator).
Real &Index (MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); } Real &Index(MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); }
/// Indexing operator, const /// Indexing operator, const
/// (only checks sizes if compiled with -DKALDI_PARANOID) /// (only checks sizes if compiled with -DKALDI_PARANOID)
inline const Real operator() (MatrixIndexT r, MatrixIndexT c) const { inline const Real operator()(MatrixIndexT r, MatrixIndexT c) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) < KALDI_PARANOID_ASSERT(
static_cast<UnsignedMatrixIndexT>(r) <
static_cast<UnsignedMatrixIndexT>(num_rows_) && static_cast<UnsignedMatrixIndexT>(num_rows_) &&
static_cast<UnsignedMatrixIndexT>(c) < static_cast<UnsignedMatrixIndexT>(c) <
static_cast<UnsignedMatrixIndexT>(num_cols_)); static_cast<UnsignedMatrixIndexT>(num_cols_));
@ -115,22 +115,22 @@ class MatrixBase {
/// Sets to zero, except ones along diagonal [for non-square matrices too] /// Sets to zero, except ones along diagonal [for non-square matrices too]
/// Copy given matrix. (no resize is done). /// Copy given matrix. (no resize is done).
template<typename OtherReal> template <typename OtherReal>
void CopyFromMat(const MatrixBase<OtherReal> & M, void CopyFromMat(const MatrixBase<OtherReal> &M,
MatrixTransposeType trans = kNoTrans); MatrixTransposeType trans = kNoTrans);
/// Copy from compressed matrix. /// Copy from compressed matrix.
//void CopyFromMat(const CompressedMatrix &M); // void CopyFromMat(const CompressedMatrix &M);
/// Copy given tpmatrix. (no resize is done). /// Copy given tpmatrix. (no resize is done).
//template<typename OtherReal> // template<typename OtherReal>
//void CopyFromTp(const TpMatrix<OtherReal> &M, // void CopyFromTp(const TpMatrix<OtherReal> &M,
//MatrixTransposeType trans = kNoTrans); // MatrixTransposeType trans = kNoTrans);
/// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h /// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h
//template<typename OtherReal> // template<typename OtherReal>
//void CopyFromMat(const CuMatrixBase<OtherReal> &M, // void CopyFromMat(const CuMatrixBase<OtherReal> &M,
//MatrixTransposeType trans = kNoTrans); // MatrixTransposeType trans = kNoTrans);
/// This function has two modes of operation. If v.Dim() == NumRows() * /// This function has two modes of operation. If v.Dim() == NumRows() *
/// NumCols(), then treats the vector as a row-by-row concatenation of a /// NumCols(), then treats the vector as a row-by-row concatenation of a
@ -138,10 +138,11 @@ class MatrixBase {
/// if v.Dim() == NumCols(), it sets each row of *this to a copy of v. /// if v.Dim() == NumCols(), it sets each row of *this to a copy of v.
void CopyRowsFromVec(const VectorBase<Real> &v); void CopyRowsFromVec(const VectorBase<Real> &v);
/// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc /// This version of CopyRowsFromVec is implemented in
//void CopyRowsFromVec(const CuVectorBase<Real> &v); /// ../cudamatrix/cu-vector.cc
// void CopyRowsFromVec(const CuVectorBase<Real> &v);
template<typename OtherReal> template <typename OtherReal>
void CopyRowsFromVec(const VectorBase<OtherReal> &v); void CopyRowsFromVec(const VectorBase<OtherReal> &v);
/// Copies vector into matrix, column-by-column. /// Copies vector into matrix, column-by-column.
@ -177,8 +178,8 @@ class MatrixBase {
const MatrixIndexT num_rows, const MatrixIndexT num_rows,
const MatrixIndexT col_offset, const MatrixIndexT col_offset,
const MatrixIndexT num_cols) const { const MatrixIndexT num_cols) const {
return SubMatrix<Real>(*this, row_offset, num_rows, return SubMatrix<Real>(
col_offset, num_cols); *this, row_offset, num_rows, col_offset, num_cols);
} }
inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset, inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset,
const MatrixIndexT num_rows) const { const MatrixIndexT num_rows) const {
@ -189,7 +190,7 @@ class MatrixBase {
return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols); return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
} }
/* /*
/// Returns sum of all elements in matrix. /// Returns sum of all elements in matrix.
Real Sum() const; Real Sum() const;
/// Returns trace of matrix. /// Returns trace of matrix.
@ -223,7 +224,8 @@ class MatrixBase {
/// each row by a scalar taken from that dimension of the vector. /// each row by a scalar taken from that dimension of the vector.
void MulRowsVec(const VectorBase<Real> &scale); void MulRowsVec(const VectorBase<Real> &scale);
/// Divide each row into src.NumCols() equal groups, and then scale i'th row's /// Divide each row into src.NumCols() equal groups, and then scale i'th
row's
/// j'th group of elements by src(i, j). Requires src.NumRows() == /// j'th group of elements by src(i, j). Requires src.NumRows() ==
/// this->NumRows() and this->NumCols() % src.NumCols() == 0. /// this->NumRows() and this->NumCols() % src.NumCols() == 0.
void MulRowsGroupMat(const MatrixBase<Real> &src); void MulRowsGroupMat(const MatrixBase<Real> &src);
@ -242,77 +244,79 @@ class MatrixBase {
/// Does inversion in double precision even if matrix was not double. /// Does inversion in double precision even if matrix was not double.
void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL, void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
bool inverse_needed = true); bool inverse_needed = true);
*/ */
/// Inverts all the elements of the matrix /// Inverts all the elements of the matrix
void InvertElements(); void InvertElements();
/* /*
/// Transpose the matrix. This one is only /// Transpose the matrix. This one is only
/// applicable to square matrices (the one in the /// applicable to square matrices (the one in the
/// Matrix child class works also for non-square. /// Matrix child class works also for non-square.
void Transpose(); void Transpose();
*/ */
/// Copies column r from column indices[r] of src. /// Copies column r from column indices[r] of src.
/// As a special case, if indexes[i] == -1, sets column i to zero. /// As a special case, if indexes[i] == -1, sets column i to zero.
/// all elements of "indices" must be in [-1, src.NumCols()-1], /// all elements of "indices" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows() /// and src.NumRows() must equal this.NumRows()
void CopyCols(const MatrixBase<Real> &src, void CopyCols(const MatrixBase<Real> &src, const MatrixIndexT *indices);
const MatrixIndexT *indices);
/// Copies row r from row indices[r] of src (does nothing /// Copies row r from row indices[r] of src (does nothing
/// As a special case, if indexes[i] == -1, sets row i to zero. /// As a special case, if indexes[i] == -1, sets row i to zero.
/// all elements of "indices" must be in [-1, src.NumRows()-1], /// all elements of "indices" must be in [-1, src.NumRows()-1],
/// and src.NumCols() must equal this.NumCols() /// and src.NumCols() must equal this.NumCols()
void CopyRows(const MatrixBase<Real> &src, void CopyRows(const MatrixBase<Real> &src, const MatrixIndexT *indices);
const MatrixIndexT *indices);
/// Add column indices[r] of src to column r. /// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i /// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(), /// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1], /// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows() /// and src.NumRows() must equal this.NumRows()
//void AddCols(const MatrixBase<Real> &src, // void AddCols(const MatrixBase<Real> &src,
// const MatrixIndexT *indices); // const MatrixIndexT *indices);
/// Copies row r of this matrix from an array of floats at the location given /// Copies row r of this matrix from an array of floats at the location
/// given
/// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero. /// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero.
/// Note: we are using "pointer to const pointer to const object" for "src", /// Note: we are using "pointer to const pointer to const object" for "src",
/// because we may create "src" by calling Data() of const CuArray /// because we may create "src" by calling Data() of const CuArray
void CopyRows(const Real *const *src); void CopyRows(const Real *const *src);
/// Copies row r of this matrix to the array of floats at the location given /// Copies row r of this matrix to the array of floats at the location given
/// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that none /// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that
/// none
/// of the memory regions pointed to by the pointers in "dst" overlap (e.g. /// of the memory regions pointed to by the pointers in "dst" overlap (e.g.
/// none of the pointers should be the same). /// none of the pointers should be the same).
void CopyToRows(Real *const *dst) const; void CopyToRows(Real *const *dst) const;
/// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
/// If indexes[r] < 0, does not add anything. all elements of "indexes" must /// If indexes[r] < 0, does not add anything. all elements of "indexes" must
/// be in [-1, src.NumRows()-1], and src.NumCols() must equal this.NumCols(). /// be in [-1, src.NumRows()-1], and src.NumCols() must equal
/// this.NumCols().
// void AddRows(Real alpha, // void AddRows(Real alpha,
// const MatrixBase<Real> &src, // const MatrixBase<Real> &src,
// const MatrixIndexT *indexes); // const MatrixIndexT *indexes);
/// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the /// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as
/// the
/// beginning of a region of memory representing a vector of floats, of the /// beginning of a region of memory representing a vector of floats, of the
/// same length as this.NumCols(). If src[r] is NULL, does not add anything. /// same length as this.NumCols(). If src[r] is NULL, does not add anything.
//void AddRows(Real alpha, const Real *const *src); // void AddRows(Real alpha, const Real *const *src);
/// For each row r of this matrix, adds it (times alpha) to the array of /// For each row r of this matrix, adds it (times alpha) to the array of
/// floats at the location given by dst[r]. If dst[r] is NULL, does not do /// floats at the location given by dst[r]. If dst[r] is NULL, does not do
/// anything for that row. Requires that none of the memory regions pointed /// anything for that row. Requires that none of the memory regions pointed
/// to by the pointers in "dst" overlap (e.g. none of the pointers should be /// to by the pointers in "dst" overlap (e.g. none of the pointers should be
/// the same). /// the same).
//void AddToRows(Real alpha, Real *const *dst) const; // void AddToRows(Real alpha, Real *const *dst) const;
/// For each row i of *this, adds this->Row(i) to /// For each row i of *this, adds this->Row(i) to
/// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing. /// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing.
/// Requires that all the indexes[i] that are >= 0 /// Requires that all the indexes[i] that are >= 0
/// be distinct, otherwise the behavior is undefined. /// be distinct, otherwise the behavior is undefined.
//void AddToRows(Real alpha, // void AddToRows(Real alpha,
// const MatrixIndexT *indexes, // const MatrixIndexT *indexes,
// MatrixBase<Real> *dst) const; // MatrixBase<Real> *dst) const;
/* /*
inline void ApplyPow(Real power) { inline void ApplyPow(Real power) {
this -> Pow(*this, power); this -> Pow(*this, power);
} }
@ -349,66 +353,82 @@ class MatrixBase {
inline void ApplyLog() { inline void ApplyLog() {
this -> Log(*this); this -> Log(*this);
} }
*/ */
/// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) =
/// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is /// P D
/// slightly complicated, due to the need for P to be real. In the symmetric /// P^{-1}. Be careful: the relationship of D to the eigenvalues we output
/// is
/// slightly complicated, due to the need for P to be real. In the
/// symmetric
/// case D is diagonal and real, but in /// case D is diagonal and real, but in
/// the non-symmetric case there may be complex-conjugate pairs of eigenvalues. /// the non-symmetric case there may be complex-conjugate pairs of
/// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually /// eigenvalues.
/// be block diagonal, with 2x2 blocks corresponding to any such pairs. If a /// In this case, for the equation (*this) = P D P^{-1} to hold, D must
/// actually
/// be block diagonal, with 2x2 blocks corresponding to any such pairs. If
/// a
/// pair is lambda +- i*mu, D will have a corresponding 2x2 block /// pair is lambda +- i*mu, D will have a corresponding 2x2 block
/// [lambda, mu; -mu, lambda]. /// [lambda, mu; -mu, lambda].
/// Note that if the input matrix (*this) is non-invertible, P may not be invertible /// Note that if the input matrix (*this) is non-invertible, P may not be
/// so in this case instead of the equation (*this) = P D P^{-1} holding, we have /// invertible
/// so in this case instead of the equation (*this) = P D P^{-1} holding, we
/// have
/// instead (*this) P = P D. /// instead (*this) P = P D.
/// ///
/// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag. /// The non-member function CreateEigenvalueMatrix creates D from eigs_real
//void Eig(MatrixBase<Real> *P, /// and eigs_imag.
// void Eig(MatrixBase<Real> *P,
// VectorBase<Real> *eigs_real, // VectorBase<Real> *eigs_real,
// VectorBase<Real> *eigs_imag) const; // VectorBase<Real> *eigs_imag) const;
/// The Power method attempts to take the matrix to a power using a method that /// The Power method attempts to take the matrix to a power using a method
/// works in general for fractional and negative powers. The input matrix must /// that
/// works in general for fractional and negative powers. The input matrix
/// must
/// be invertible and have reasonable condition (or we don't guarantee the /// be invertible and have reasonable condition (or we don't guarantee the
/// results. The method is based on the eigenvalue decomposition. It will /// results. The method is based on the eigenvalue decomposition. It will
/// return false and leave the matrix unchanged, if at entry the matrix had /// return false and leave the matrix unchanged, if at entry the matrix had
/// real negative eigenvalues (or if it had zero eigenvalues and the power was /// real negative eigenvalues (or if it had zero eigenvalues and the power
/// was
/// negative). /// negative).
// bool Power(Real pow); // bool Power(Real pow);
/** Singular value decomposition /** Singular value decomposition
Major limitations: Major limitations:
For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we
return
the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the
one on the left is rectangular. one on the left is rectangular.
In Svd, *this = U*diag(S)*Vt. In Svd, *this = U*diag(S)*Vt.
Null pointers for U and/or Vt at input mean we do not want that output. We Null pointers for U and/or Vt at input mean we do not want that output.
We
expect that S.Dim() == m, U is either NULL or m by n, expect that S.Dim() == m, U is either NULL or m by n,
and v is either NULL or n by n. and v is either NULL or n by n.
The singular values are not sorted (use SortSvd for that). */ The singular values are not sorted (use SortSvd for that). */
//void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U, // void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
// MatrixBase<Real> *Vt); // Destroys calling matrix. // MatrixBase<Real> *Vt); // Destroys calling matrix.
/// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already /// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is
/// already
/// transposed; the normal formulation is U diag(s) V^T. /// transposed; the normal formulation is U diag(s) V^T.
/// Null pointers for U or V mean we don't want that output (this saves /// Null pointers for U or V mean we don't want that output (this saves
/// compute). The singular values are not sorted (use SortSvd for that). /// compute). The singular values are not sorted (use SortSvd for that).
//void Svd(VectorBase<Real> *s, MatrixBase<Real> *U, // void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
// MatrixBase<Real> *Vt) const; // MatrixBase<Real> *Vt) const;
/// Compute SVD but only retain the singular values. /// Compute SVD but only retain the singular values.
//void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); } // void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
/// Returns smallest singular value. /// Returns smallest singular value.
//Real MinSingularValue() const { // Real MinSingularValue() const {
// Vector<Real> tmp(std::min(NumRows(), NumCols())); // Vector<Real> tmp(std::min(NumRows(), NumCols()));
//Svd(&tmp); // Svd(&tmp);
//return tmp.Min(); // return tmp.Min();
//} //}
//void TestUninitialized() const; // This function is designed so that if any element // void TestUninitialized() const; // This function is designed so that if
// any element
// if the matrix is uninitialized memory, valgrind will complain. // if the matrix is uninitialized memory, valgrind will complain.
/// Returns condition number by computing Svd. Works even if cols > rows. /// Returns condition number by computing Svd. Works even if cols > rows.
@ -422,16 +442,19 @@ class MatrixBase {
/// Returns true if matrix is Diagonal. /// Returns true if matrix is Diagonal.
bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if the matrix is all zeros, except for ones on diagonal. (it /// Returns true if the matrix is all zeros, except for ones on diagonal.
(it
/// does not have to be square). More specifically, this function returns /// does not have to be square). More specifically, this function returns
/// false if for any i, j, (*this)(i, j) differs by more than cutoff from the /// false if for any i, j, (*this)(i, j) differs by more than cutoff from
the
/// expression (i == j ? 1 : 0). /// expression (i == j ? 1 : 0).
bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number
/// Returns true if matrix is all zeros. /// Returns true if matrix is all zeros.
bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number
/// Frobenius norm, which is the sqrt of sum of square elements. Same as Schatten 2-norm, /// Frobenius norm, which is the sqrt of sum of square elements. Same as
Schatten 2-norm,
/// or just "2-norm". /// or just "2-norm".
Real FrobeniusNorm() const; Real FrobeniusNorm() const;
@ -461,7 +484,8 @@ class MatrixBase {
/// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the /// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the
/// corresponding element in "src". Note: in general you can make different /// corresponding element in "src". Note: in general you can make different
/// choices for x = 0, but for now please leave it as it (i.e. returning zero) /// choices for x = 0, but for now please leave it as it (i.e. returning
zero)
/// because it affects the RectifiedLinearComponent in the neural net code. /// because it affects the RectifiedLinearComponent in the neural net code.
void Heaviside(const MatrixBase<Real> &src); void Heaviside(const MatrixBase<Real> &src);
@ -477,7 +501,8 @@ class MatrixBase {
/// If the power is negative and the input to the power is zero, /// If the power is negative and the input to the power is zero,
/// The output will be set zero. If include_sign is true, it will /// The output will be set zero. If include_sign is true, it will
/// multiply the result by the sign of the input. /// multiply the result by the sign of the input.
void PowAbs(const MatrixBase<Real> &src, Real power, bool include_sign=false); void PowAbs(const MatrixBase<Real> &src, Real power, bool
include_sign=false);
void Floor(const MatrixBase<Real> &src, Real floor_val); void Floor(const MatrixBase<Real> &src, Real floor_val);
@ -492,36 +517,52 @@ class MatrixBase {
/// Floor(src, lower_limit); /// Floor(src, lower_limit);
/// Ceiling(src, upper_limit); /// Ceiling(src, upper_limit);
/// Exp(src) /// Exp(src)
void ExpLimited(const MatrixBase<Real> &src, Real lower_limit, Real upper_limit); void ExpLimited(const MatrixBase<Real> &src, Real lower_limit, Real
upper_limit);
/// Set each element to y = log(1 + exp(x)) /// Set each element to y = log(1 + exp(x))
void SoftHinge(const MatrixBase<Real> &src); void SoftHinge(const MatrixBase<Real> &src);
/// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p). /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 /
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0. p).
/// Requires src.NumRows() == this->NumRows() and src.NumCols() %
this->NumCols() == 0.
void GroupPnorm(const MatrixBase<Real> &src, Real power); void GroupPnorm(const MatrixBase<Real> &src, Real power);
/// Calculate derivatives for the GroupPnorm function above... /// Calculate derivatives for the GroupPnorm function above...
/// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable), /// if "input" is the input to the GroupPnorm function above (i.e. the "src"
/// and "output" is the result of the computation (i.e. the "this" of that function variable),
/// call), and *this has the same dimension as "input", then it sets each element /// and "output" is the result of the computation (i.e. the "this" of that
/// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where function
/// "output-elem" is whichever element of output depends on that input element. /// call), and *this has the same dimension as "input", then it sets each
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output, element
/// of *this to the derivative d(output-elem)/d(input-elem) for each element
of "input", where
/// "output-elem" is whichever element of output depends on that input
element.
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
&output,
Real power); Real power);
/// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j /// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0. /// Requires src.NumRows() == this->NumRows() and src.NumCols() %
this->NumCols() == 0.
void GroupMax(const MatrixBase<Real> &src); void GroupMax(const MatrixBase<Real> &src);
/// Calculate derivatives for the GroupMax function above, where /// Calculate derivatives for the GroupMax function above, where
/// "input" is the input to the GroupMax function above (i.e. the "src" variable), /// "input" is the input to the GroupMax function above (i.e. the "src"
/// and "output" is the result of the computation (i.e. the "this" of that function variable),
/// and "output" is the result of the computation (i.e. the "this" of that
function
/// call), and *this must have the same dimension as "input". Each element /// call), and *this must have the same dimension as "input". Each element
/// of *this will be set to 1 if the corresponding input equals the output of /// of *this will be set to 1 if the corresponding input equals the output
/// the group, and 0 otherwise. The equals the function derivative where it is of
/// defined (it's not defined where multiple inputs in the group are equal to the output). /// the group, and 0 otherwise. The equals the function derivative where it
void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output); is
/// defined (it's not defined where multiple inputs in the group are equal
to the output).
void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
&output);
/// Set each element to the tanh of the corresponding element of "src". /// Set each element to the tanh of the corresponding element of "src".
void Tanh(const MatrixBase<Real> &src); void Tanh(const MatrixBase<Real> &src);
@ -535,55 +576,56 @@ class MatrixBase {
// element-by-element, set *this = diff * (1.0 - value^2). // element-by-element, set *this = diff * (1.0 - value^2).
void DiffTanh(const MatrixBase<Real> &value, void DiffTanh(const MatrixBase<Real> &value,
const MatrixBase<Real> &diff); const MatrixBase<Real> &diff);
*/ */
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not * orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
* positive semi-definite (check_thresh controls how stringent the check is; * positive semi-definite (check_thresh controls how stringent the check is;
* set it to 2 to ensure it won't ever complain, but it will zero out negative * set it to 2 to ensure it won't ever complain, but it will zero out
* negative
* dimensions in your matrix. * dimensions in your matrix.
* *
* Caution: if you want the eigenvalues, it may make more sense to convert to * Caution: if you want the eigenvalues, it may make more sense to convert
* SpMatrix and use Eig() function there, which uses eigenvalue decomposition * to
* SpMatrix and use Eig() function there, which uses eigenvalue
* decomposition
* directly rather than SVD. * directly rather than SVD.
*/ */
/// stream read. /// stream read.
/// Use instead of stream<<*this, if you want to add to existing contents. /// Use instead of stream<<*this, if you want to add to existing contents.
// Will throw exception on failure. // Will throw exception on failure.
void Read(std::istream & in, bool binary); void Read(std::istream &in, bool binary);
/// write to stream. /// write to stream.
void Write(std::ostream & out, bool binary) const; void Write(std::ostream &out, bool binary) const;
// Below is internal methods for Svd, user does not have to know about this. // Below is internal methods for Svd, user does not have to know about this.
protected: protected:
/// Initializer, callable only from child. /// Initializer, callable only from child.
explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) : explicit MatrixBase(Real *data,
data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) { MatrixIndexT cols,
MatrixIndexT rows,
MatrixIndexT stride)
: data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real); KALDI_ASSERT_IS_FLOATING_TYPE(Real);
} }
/// Initializer, callable only from child. /// Initializer, callable only from child.
/// Empty initializer, for un-initialized matrix. /// Empty initializer, for un-initialized matrix.
explicit MatrixBase(): data_(NULL) { explicit MatrixBase() : data_(NULL) { KALDI_ASSERT_IS_FLOATING_TYPE(Real); }
KALDI_ASSERT_IS_FLOATING_TYPE(Real);
}
// Make sure pointers to MatrixBase cannot be deleted. // Make sure pointers to MatrixBase cannot be deleted.
~MatrixBase() { } ~MatrixBase() {}
/// A workaround that allows SubMatrix to get a pointer to non-const data /// A workaround that allows SubMatrix to get a pointer to non-const data
/// for const Matrix. Unfortunately C++ does not allow us to declare a /// for const Matrix. Unfortunately C++ does not allow us to declare a
/// "public const" inheritance or anything like that, so it would require /// "public const" inheritance or anything like that, so it would require
/// a lot of work to make the SubMatrix class totally const-correct-- /// a lot of work to make the SubMatrix class totally const-correct--
/// we would have to override many of the Matrix functions. /// we would have to override many of the Matrix functions.
inline Real* Data_workaround() const { inline Real *Data_workaround() const { return data_; }
return data_;
}
/// data memory area /// data memory area
Real* data_; Real *data_;
/// these attributes store the real matrix size as it is stored in memory /// these attributes store the real matrix size as it is stored in memory
/// including memalignment /// including memalignment
@ -592,63 +634,66 @@ class MatrixBase {
/** True number of columns for the internal matrix. This number may differ /** True number of columns for the internal matrix. This number may differ
* from num_cols_ as memory alignment might be used. */ * from num_cols_ as memory alignment might be used. */
MatrixIndexT stride_; MatrixIndexT stride_;
private: private:
KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase); KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase);
}; };
/// A class for storing matrices. /// A class for storing matrices.
template<typename Real> template <typename Real>
class Matrix : public MatrixBase<Real> { class Matrix : public MatrixBase<Real> {
public: public:
/// Empty constructor. /// Empty constructor.
Matrix(); Matrix();
/// Basic constructor. /// Basic constructor.
Matrix(const MatrixIndexT r, const MatrixIndexT c, Matrix(const MatrixIndexT r,
const MatrixIndexT c,
MatrixResizeType resize_type = kSetZero, MatrixResizeType resize_type = kSetZero,
MatrixStrideType stride_type = kDefaultStride): MatrixStrideType stride_type = kDefaultStride)
MatrixBase<Real>() { Resize(r, c, resize_type, stride_type); } : MatrixBase<Real>() {
Resize(r, c, resize_type, stride_type);
}
/// Swaps the contents of *this and *other. Shallow swap. /// Swaps the contents of *this and *other. Shallow swap.
void Swap(Matrix<Real> *other); void Swap(Matrix<Real> *other);
/// Constructor from any MatrixBase. Can also copy with transpose. /// Constructor from any MatrixBase. Can also copy with transpose.
/// Allocates new memory. /// Allocates new memory.
explicit Matrix(const MatrixBase<Real> & M, explicit Matrix(const MatrixBase<Real> &M,
MatrixTransposeType trans = kNoTrans); MatrixTransposeType trans = kNoTrans);
/// Same as above, but need to avoid default copy constructor. /// Same as above, but need to avoid default copy constructor.
Matrix(const Matrix<Real> & M); // (cannot make explicit) Matrix(const Matrix<Real> &M); // (cannot make explicit)
/// Copy constructor: as above, but from another type. /// Copy constructor: as above, but from another type.
template<typename OtherReal> template <typename OtherReal>
explicit Matrix(const MatrixBase<OtherReal> & M, explicit Matrix(const MatrixBase<OtherReal> &M,
MatrixTransposeType trans = kNoTrans); MatrixTransposeType trans = kNoTrans);
/// Copy constructor taking TpMatrix... /// Copy constructor taking TpMatrix...
//template <typename OtherReal> // template <typename OtherReal>
//explicit Matrix(const TpMatrix<OtherReal> & M, // explicit Matrix(const TpMatrix<OtherReal> & M,
//MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() { // MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
//if (trans == kNoTrans) { // if (trans == kNoTrans) {
//Resize(M.NumRows(), M.NumCols(), kUndefined); // Resize(M.NumRows(), M.NumCols(), kUndefined);
//this->CopyFromTp(M); // this->CopyFromTp(M);
//} else { //} else {
//Resize(M.NumCols(), M.NumRows(), kUndefined); // Resize(M.NumCols(), M.NumRows(), kUndefined);
//this->CopyFromTp(M, kTrans); // this->CopyFromTp(M, kTrans);
//} //}
//} //}
/// read from stream. /// read from stream.
// Unlike one in base, allows resizing. // Unlike one in base, allows resizing.
void Read(std::istream & in, bool binary); void Read(std::istream &in, bool binary);
/// Remove a specified row. /// Remove a specified row.
void RemoveRow(MatrixIndexT i); void RemoveRow(MatrixIndexT i);
/// Transpose the matrix. Works for non-square /// Transpose the matrix. Works for non-square
/// matrices as well as square ones. /// matrices as well as square ones.
//void Transpose(); // void Transpose();
/// Distructor to free matrices. /// Distructor to free matrices.
~Matrix() { Destroy(); } ~Matrix() { Destroy(); }
@ -671,7 +716,7 @@ class Matrix : public MatrixBase<Real> {
MatrixStrideType stride_type = kDefaultStride); MatrixStrideType stride_type = kDefaultStride);
/// Assignment operator that takes MatrixBase. /// Assignment operator that takes MatrixBase.
Matrix<Real> &operator = (const MatrixBase<Real> &other) { Matrix<Real> &operator=(const MatrixBase<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() || if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols()) MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols(), kUndefined); Resize(other.NumRows(), other.NumCols(), kUndefined);
@ -680,7 +725,7 @@ class Matrix : public MatrixBase<Real> {
} }
/// Assignment operator. Needed for inclusion in std::vector. /// Assignment operator. Needed for inclusion in std::vector.
Matrix<Real> &operator = (const Matrix<Real> &other) { Matrix<Real> &operator=(const Matrix<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() || if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols()) MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols(), kUndefined); Resize(other.NumRows(), other.NumCols(), kUndefined);
@ -694,13 +739,14 @@ class Matrix : public MatrixBase<Real> {
void Destroy(); void Destroy();
/// Init assumes the current class contents are invalid (i.e. junk or have /// Init assumes the current class contents are invalid (i.e. junk or have
/// already been freed), and it sets the matrix to newly allocated memory with /// already been freed), and it sets the matrix to newly allocated memory
/// the specified number of rows and columns. r == c == 0 is acceptable. The data /// with
/// the specified number of rows and columns. r == c == 0 is acceptable.
/// The data
/// memory contents will be undefined. /// memory contents will be undefined.
void Init(const MatrixIndexT r, void Init(const MatrixIndexT r,
const MatrixIndexT c, const MatrixIndexT c,
const MatrixStrideType stride_type); const MatrixStrideType stride_type);
}; };
/// @} end "addtogroup matrix_group" /// @} end "addtogroup matrix_group"
@ -710,7 +756,7 @@ class Matrix : public MatrixBase<Real> {
/// A structure containing the HTK header. /// A structure containing the HTK header.
/// [TODO: change the style of the variables to Kaldi-compliant] /// [TODO: change the style of the variables to Kaldi-compliant]
template<typename Real> template <typename Real>
class SubMatrix : public MatrixBase<Real> { class SubMatrix : public MatrixBase<Real> {
public: public:
// Initialize a SubMatrix from part of a matrix; this is // Initialize a SubMatrix from part of a matrix; this is
@ -718,7 +764,7 @@ class SubMatrix : public MatrixBase<Real> {
// This initializer is against the proper semantics of "const", since // This initializer is against the proper semantics of "const", since
// SubMatrix can change its contents. It would be hard to implement // SubMatrix can change its contents. It would be hard to implement
// a "const-safe" version of this class. // a "const-safe" version of this class.
SubMatrix(const MatrixBase<Real>& T, SubMatrix(const MatrixBase<Real> &T,
const MatrixIndexT ro, // row offset, 0 < ro < NumRows() const MatrixIndexT ro, // row offset, 0 < ro < NumRows()
const MatrixIndexT r, // number of rows, r > 0 const MatrixIndexT r, // number of rows, r > 0
const MatrixIndexT co, // column offset, 0 < co < NumCols() const MatrixIndexT co, // column offset, 0 < co < NumCols()
@ -735,13 +781,13 @@ class SubMatrix : public MatrixBase<Real> {
/// This type of constructor is needed for Range() to work [in Matrix base /// This type of constructor is needed for Range() to work [in Matrix base
/// class]. Cannot make it explicit. /// class]. Cannot make it explicit.
SubMatrix<Real> (const SubMatrix &other): SubMatrix<Real>(const SubMatrix &other)
MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_, : MatrixBase<Real>(
other.stride_) {} other.data_, other.num_cols_, other.num_rows_, other.stride_) {}
private: private:
/// Disallow assignment. /// Disallow assignment.
SubMatrix<Real> &operator = (const SubMatrix<Real> &other); SubMatrix<Real> &operator=(const SubMatrix<Real> &other);
}; };
/// @} End of "addtogroup matrix_funcs_io". /// @} End of "addtogroup matrix_funcs_io".
@ -794,25 +840,33 @@ Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
/// the same as U->NumCols(), and we sort s from greatest to least absolute /// the same as U->NumCols(), and we sort s from greatest to least absolute
/// value (if sort_on_absolute_value == true) or greatest to least value /// value (if sort_on_absolute_value == true) or greatest to least value
/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it /// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it
/// exists, around in the same way. Note: the "absolute value" part won't matter /// exists, around in the same way. Note: the "absolute value" part won't
matter
/// if this is an actual SVD, since singular values are non-negative. /// if this is an actual SVD, since singular values are non-negative.
template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U, template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real>* Vt = NULL, MatrixBase<Real>* Vt = NULL,
bool sort_on_absolute_value = true); bool sort_on_absolute_value = true);
/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig. /// Creates the eigenvalue matrix D that is part of the decomposition used
Matrix::Eig.
/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2 /// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2
/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a corresponding /// for complex pairs. If a complex pair is lambda +- i*mu, D will have a
corresponding
/// 2x2 block [lambda, mu; -mu, lambda]. /// 2x2 block [lambda, mu; -mu, lambda].
/// This function will throw if any complex eigenvalues are not in complex conjugate /// This function will throw if any complex eigenvalues are not in complex
conjugate
/// pairs (or the members of such pairs are not consecutively numbered). /// pairs (or the members of such pairs are not consecutively numbered).
template<typename Real> template<typename Real>
void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> &imag, void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real>
&imag,
MatrixBase<Real> *D); MatrixBase<Real> *D);
/// The following function is used in Matrix::Power, and separately tested, so we /// The following function is used in Matrix::Power, and separately tested, so
/// declare it here mainly for the testing code to see. It takes a complex value to we
/// a power using a method that will work for noninteger powers (but will fail if the /// declare it here mainly for the testing code to see. It takes a complex
value to
/// a power using a method that will work for noninteger powers (but will fail
if the
/// complex value is real and negative). /// complex value is real and negative).
template<typename Real> template<typename Real>
bool AttemptComplexPower(Real *x_re, Real *x_im, Real power); bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
@ -823,17 +877,17 @@ bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
/// \addtogroup matrix_funcs_io /// \addtogroup matrix_funcs_io
/// @{ /// @{
template<typename Real> template <typename Real>
std::ostream & operator << (std::ostream & Out, const MatrixBase<Real> & M); std::ostream &operator<<(std::ostream &Out, const MatrixBase<Real> &M);
template<typename Real> template <typename Real>
std::istream & operator >> (std::istream & In, MatrixBase<Real> & M); std::istream &operator>>(std::istream &In, MatrixBase<Real> &M);
// The Matrix read allows resizing, so we override the MatrixBase one. // The Matrix read allows resizing, so we override the MatrixBase one.
template<typename Real> template <typename Real>
std::istream & operator >> (std::istream & In, Matrix<Real> & M); std::istream &operator>>(std::istream &In, Matrix<Real> &M);
template<typename Real> template <typename Real>
bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) { bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols()); return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
} }
@ -844,7 +898,6 @@ bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
} // namespace kaldi } // namespace kaldi
// we need to include the implementation and some // we need to include the implementation and some
// template specializations. // template specializations.
#include "matrix/kaldi-matrix-inl.h" #include "matrix/kaldi-matrix-inl.h"

@ -26,32 +26,33 @@
namespace kaldi { namespace kaldi {
template<typename Real> template <typename Real>
std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) { std::ostream &operator<<(std::ostream &os, const VectorBase<Real> &rv) {
rv.Write(os, false); rv.Write(os, false);
return os; return os;
} }
template<typename Real> template <typename Real>
std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) { std::istream &operator>>(std::istream &is, VectorBase<Real> &rv) {
rv.Read(is, false); rv.Read(is, false);
return is; return is;
} }
template<typename Real> template <typename Real>
std::istream &operator >> (std::istream &is, Vector<Real> &rv) { std::istream &operator>>(std::istream &is, Vector<Real> &rv) {
rv.Read(is, false); rv.Read(is, false);
return is; return is;
} }
//template<> // template<>
//template<> // template<>
//void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv); // void VectorBase<float>::AddVec(const float alpha, const VectorBase<float>
// &rv);
//template<> // template<>
//template<> // template<>
//void VectorBase<double>::AddVec<double>(const double alpha, // void VectorBase<double>::AddVec<double>(const double alpha,
//const VectorBase<double> &rv); // const VectorBase<double> &rv);
} // namespace kaldi } // namespace kaldi

File diff suppressed because it is too large Load Diff

@ -37,7 +37,7 @@ namespace kaldi {
/// Provides a vector abstraction class. /// Provides a vector abstraction class.
/// This class provides a way to work with vectors in kaldi. /// This class provides a way to work with vectors in kaldi.
/// It encapsulates basic operations and memory optimizations. /// It encapsulates basic operations and memory optimizations.
template<typename Real> template <typename Real>
class VectorBase { class VectorBase {
public: public:
/// Set vector to all zeros. /// Set vector to all zeros.
@ -53,23 +53,23 @@ class VectorBase {
inline MatrixIndexT Dim() const { return dim_; } inline MatrixIndexT Dim() const { return dim_; }
/// Returns the size in memory of the vector, in bytes. /// Returns the size in memory of the vector, in bytes.
inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); } inline MatrixIndexT SizeInBytes() const { return (dim_ * sizeof(Real)); }
/// Returns a pointer to the start of the vector's data. /// Returns a pointer to the start of the vector's data.
inline Real* Data() { return data_; } inline Real *Data() { return data_; }
/// Returns a pointer to the start of the vector's data (const). /// Returns a pointer to the start of the vector's data (const).
inline const Real* Data() const { return data_; } inline const Real *Data() const { return data_; }
/// Indexing operator (const). /// Indexing operator (const).
inline Real operator() (MatrixIndexT i) const { inline Real operator()(MatrixIndexT i) const {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_)); static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i); return *(data_ + i);
} }
/// Indexing operator (non-const). /// Indexing operator (non-const).
inline Real & operator() (MatrixIndexT i) { inline Real &operator()(MatrixIndexT i) {
KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) < KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
static_cast<UnsignedMatrixIndexT>(dim_)); static_cast<UnsignedMatrixIndexT>(dim_));
return *(data_ + i); return *(data_ + i);
@ -98,12 +98,12 @@ class VectorBase {
void CopyFromVec(const VectorBase<Real> &v); void CopyFromVec(const VectorBase<Real> &v);
/// Copy data from another vector of different type (double vs. float) /// Copy data from another vector of different type (double vs. float)
template<typename OtherReal> template <typename OtherReal>
void CopyFromVec(const VectorBase<OtherReal> &v); void CopyFromVec(const VectorBase<OtherReal> &v);
/// Performs a row stack of the matrix M /// Performs a row stack of the matrix M
void CopyRowsFromMat(const MatrixBase<Real> &M); void CopyRowsFromMat(const MatrixBase<Real> &M);
template<typename OtherReal> template <typename OtherReal>
void CopyRowsFromMat(const MatrixBase<OtherReal> &M); void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
/// Performs a column stack of the matrix M /// Performs a column stack of the matrix M
@ -113,12 +113,12 @@ class VectorBase {
/// this->Copy(M[row]). /// this->Copy(M[row]).
void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row); void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
/// Extracts a row of the matrix M with type conversion. /// Extracts a row of the matrix M with type conversion.
template<typename OtherReal> template <typename OtherReal>
void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row); void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
/// Extracts a column of the matrix M. /// Extracts a column of the matrix M.
template<typename OtherReal> template <typename OtherReal>
void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col); void CopyColFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT col);
/// Reads from C++ stream (option to add to existing contents). /// Reads from C++ stream (option to add to existing contents).
/// Throws exception on failure /// Throws exception on failure
@ -129,19 +129,21 @@ class VectorBase {
friend class VectorBase<double>; friend class VectorBase<double>;
friend class VectorBase<float>; friend class VectorBase<float>;
protected: protected:
/// Destructor; does not deallocate memory, this is handled by child classes. /// Destructor; does not deallocate memory, this is handled by child
/// classes.
/// This destructor is protected so this object can only be /// This destructor is protected so this object can only be
/// deleted via a child. /// deleted via a child.
~VectorBase() {} ~VectorBase() {}
/// Empty initializer, corresponds to vector of zero size. /// Empty initializer, corresponds to vector of zero size.
explicit VectorBase(): data_(NULL), dim_(0) { explicit VectorBase() : data_(NULL), dim_(0) {
KALDI_ASSERT_IS_FLOATING_TYPE(Real); KALDI_ASSERT_IS_FLOATING_TYPE(Real);
} }
/// data memory area /// data memory area
Real* data_; Real *data_;
/// dimension of vector /// dimension of vector
MatrixIndexT dim_; MatrixIndexT dim_;
KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase); KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
@ -151,25 +153,28 @@ class VectorBase {
* *
* This class provides a way to work with vectors in kaldi. * This class provides a way to work with vectors in kaldi.
* It encapsulates basic operations and memory optimizations. */ * It encapsulates basic operations and memory optimizations. */
template<typename Real> template <typename Real>
class Vector: public VectorBase<Real> { class Vector : public VectorBase<Real> {
public: public:
/// Constructor that takes no arguments. Initializes to empty. /// Constructor that takes no arguments. Initializes to empty.
Vector(): VectorBase<Real>() {} Vector() : VectorBase<Real>() {}
/// Constructor with specific size. Sets to all-zero by default /// Constructor with specific size. Sets to all-zero by default
/// if set_zero == false, memory contents are undefined. /// if set_zero == false, memory contents are undefined.
explicit Vector(const MatrixIndexT s, explicit Vector(const MatrixIndexT s,
MatrixResizeType resize_type = kSetZero) MatrixResizeType resize_type = kSetZero)
: VectorBase<Real>() { Resize(s, resize_type); } : VectorBase<Real>() {
Resize(s, resize_type);
}
/// Copy constructor from CUDA vector /// Copy constructor from CUDA vector
/// This is defined in ../cudamatrix/cu-vector.h /// This is defined in ../cudamatrix/cu-vector.h
//template<typename OtherReal> // template<typename OtherReal>
//explicit Vector(const CuVectorBase<OtherReal> &cu); // explicit Vector(const CuVectorBase<OtherReal> &cu);
/// Copy constructor. The need for this is controversial. /// Copy constructor. The need for this is controversial.
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit) Vector(const Vector<Real> &v)
: VectorBase<Real>() { // (cannot be explicit)
Resize(v.Dim(), kUndefined); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
@ -181,19 +186,19 @@ class Vector: public VectorBase<Real> {
} }
/// Type conversion constructor. /// Type conversion constructor.
template<typename OtherReal> template <typename OtherReal>
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() { explicit Vector(const VectorBase<OtherReal> &v) : VectorBase<Real>() {
Resize(v.Dim(), kUndefined); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
// Took this out since it is unsafe : Arnab // Took this out since it is unsafe : Arnab
// /// Constructor from a pointer and a size; copies the data to a location // /// Constructor from a pointer and a size; copies the data to a location
// /// it owns. // /// it owns.
// Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() { // Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
// Resize(s); // Resize(s);
// CopyFromPtr(Data, s); // CopyFromPtr(Data, s);
// } // }
/// Swaps the contents of *this and *other. Shallow swap. /// Swaps the contents of *this and *other. Shallow swap.
@ -219,59 +224,63 @@ class Vector: public VectorBase<Real> {
void RemoveElement(MatrixIndexT i); void RemoveElement(MatrixIndexT i);
/// Assignment operator. /// Assignment operator.
Vector<Real> &operator = (const Vector<Real> &other) { Vector<Real> &operator=(const Vector<Real> &other) {
Resize(other.Dim(), kUndefined); Resize(other.Dim(), kUndefined);
this->CopyFromVec(other); this->CopyFromVec(other);
return *this; return *this;
} }
/// Assignment operator that takes VectorBase. /// Assignment operator that takes VectorBase.
Vector<Real> &operator = (const VectorBase<Real> &other) { Vector<Real> &operator=(const VectorBase<Real> &other) {
Resize(other.Dim(), kUndefined); Resize(other.Dim(), kUndefined);
this->CopyFromVec(other); this->CopyFromVec(other);
return *this; return *this;
} }
private: private:
/// Init assumes the current contents of the class are invalid (i.e. junk or /// Init assumes the current contents of the class are invalid (i.e. junk or
/// has already been freed), and it sets the vector to newly allocated memory /// has already been freed), and it sets the vector to newly allocated
/// with the specified dimension. dim == 0 is acceptable. The memory contents /// memory
/// with the specified dimension. dim == 0 is acceptable. The memory
/// contents
/// pointed to by data_ will be undefined. /// pointed to by data_ will be undefined.
void Init(const MatrixIndexT dim); void Init(const MatrixIndexT dim);
/// Destroy function, called internally. /// Destroy function, called internally.
void Destroy(); void Destroy();
}; };
/// Represents a non-allocating general vector which can be defined /// Represents a non-allocating general vector which can be defined
/// as a sub-vector of higher-level vector [or as the row of a matrix]. /// as a sub-vector of higher-level vector [or as the row of a matrix].
template<typename Real> template <typename Real>
class SubVector : public VectorBase<Real> { class SubVector : public VectorBase<Real> {
public: public:
/// Constructor from a Vector or SubVector. /// Constructor from a Vector or SubVector.
/// SubVectors are not const-safe and it's very hard to make them /// SubVectors are not const-safe and it's very hard to make them
/// so for now we just give up. This function contains const_cast. /// so for now we just give up. This function contains const_cast.
SubVector(const VectorBase<Real> &t, const MatrixIndexT origin, SubVector(const VectorBase<Real> &t,
const MatrixIndexT length) : VectorBase<Real>() { const MatrixIndexT origin,
const MatrixIndexT length)
: VectorBase<Real>() {
// following assert equiv to origin>=0 && length>=0 && // following assert equiv to origin>=0 && length>=0 &&
// origin+length <= rt.dim_ // origin+length <= rt.dim_
KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+ KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin) +
static_cast<UnsignedMatrixIndexT>(length) <= static_cast<UnsignedMatrixIndexT>(length) <=
static_cast<UnsignedMatrixIndexT>(t.Dim())); static_cast<UnsignedMatrixIndexT>(t.Dim()));
VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin); VectorBase<Real>::data_ = const_cast<Real *>(t.Data() + origin);
VectorBase<Real>::dim_ = length; VectorBase<Real>::dim_ = length;
} }
/// This constructor initializes the vector to point at the contents /// This constructor initializes the vector to point at the contents
/// of this packed matrix (SpMatrix or TpMatrix). /// of this packed matrix (SpMatrix or TpMatrix).
// SubVector(const PackedMatrix<Real> &M) { // SubVector(const PackedMatrix<Real> &M) {
//VectorBase<Real>::data_ = const_cast<Real*> (M.Data()); // VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
//VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2; // VectorBase<Real>::dim_ = (M.NumRows()*(M.NumRows()+1))/2;
//} //}
/// Copy constructor /// Copy constructor
SubVector(const SubVector &other) : VectorBase<Real> () { SubVector(const SubVector &other) : VectorBase<Real>() {
// this copy constructor needed for Range() to work in base class. // this copy constructor needed for Range() to work in base class.
VectorBase<Real>::data_ = other.data_; VectorBase<Real>::data_ = other.data_;
VectorBase<Real>::dim_ = other.dim_; VectorBase<Real>::dim_ = other.dim_;
@ -280,14 +289,14 @@ class SubVector : public VectorBase<Real> {
/// Constructor from a pointer to memory and a length. Keeps a pointer /// Constructor from a pointer to memory and a length. Keeps a pointer
/// to the data but does not take ownership (will never delete). /// to the data but does not take ownership (will never delete).
/// Caution: this constructor enables you to evade const constraints. /// Caution: this constructor enables you to evade const constraints.
SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real> () { SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real>() {
VectorBase<Real>::data_ = const_cast<Real*>(data); VectorBase<Real>::data_ = const_cast<Real *>(data);
VectorBase<Real>::dim_ = length; VectorBase<Real>::dim_ = length;
} }
/// This operation does not preserve const-ness, so be careful. /// This operation does not preserve const-ness, so be careful.
SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) { SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row)); VectorBase<Real>::data_ = const_cast<Real *>(matrix.RowData(row));
VectorBase<Real>::dim_ = matrix.NumCols(); VectorBase<Real>::dim_ = matrix.NumCols();
} }
@ -295,7 +304,7 @@ class SubVector : public VectorBase<Real> {
private: private:
/// Disallow assignment operator. /// Disallow assignment operator.
SubVector & operator = (const SubVector &other) {} SubVector &operator=(const SubVector &other) {}
}; };
/// @} end of "addtogroup matrix_group" /// @} end of "addtogroup matrix_group"
@ -303,43 +312,41 @@ class SubVector : public VectorBase<Real> {
/// @{ /// @{
/// Output to a C++ stream. Non-binary by default (use Write for /// Output to a C++ stream. Non-binary by default (use Write for
/// binary output). /// binary output).
template<typename Real> template <typename Real>
std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v); std::ostream &operator<<(std::ostream &out, const VectorBase<Real> &v);
/// Input from a C++ stream. Will automatically read text or /// Input from a C++ stream. Will automatically read text or
/// binary data from the stream. /// binary data from the stream.
template<typename Real> template <typename Real>
std::istream & operator >> (std::istream & in, VectorBase<Real> & v); std::istream &operator>>(std::istream &in, VectorBase<Real> &v);
/// Input from a C++ stream. Will automatically read text or /// Input from a C++ stream. Will automatically read text or
/// binary data from the stream. /// binary data from the stream.
template<typename Real> template <typename Real>
std::istream & operator >> (std::istream & in, Vector<Real> & v); std::istream &operator>>(std::istream &in, Vector<Real> &v);
/// @} end of \addtogroup matrix_funcs_io /// @} end of \addtogroup matrix_funcs_io
/// \addtogroup matrix_funcs_scalar /// \addtogroup matrix_funcs_scalar
/// @{ /// @{
//template<typename Real> // template<typename Real>
//bool ApproxEqual(const VectorBase<Real> &a, // bool ApproxEqual(const VectorBase<Real> &a,
//const VectorBase<Real> &b, Real tol = 0.01) { // const VectorBase<Real> &b, Real tol = 0.01) {
//return a.ApproxEqual(b, tol); // return a.ApproxEqual(b, tol);
//} //}
//template<typename Real> // template<typename Real>
//inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b, // inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
//float tol = 0.01) { // float tol = 0.01) {
//KALDI_ASSERT(a.ApproxEqual(b, tol)); // KALDI_ASSERT(a.ApproxEqual(b, tol));
//} //}
} // namespace kaldi } // namespace kaldi
// we need to include the implementation // we need to include the implementation
#include "matrix/kaldi-vector-inl.h" #include "matrix/kaldi-vector-inl.h"
#endif // KALDI_MATRIX_KALDI_VECTOR_H_ #endif // KALDI_MATRIX_KALDI_VECTOR_H_

@ -27,18 +27,15 @@
namespace kaldi { namespace kaldi {
// this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library // this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library
// we are writing them as literals because we don't want to include here matrix/kaldi-blas.h, // we are writing them as literals because we don't want to include here
// matrix/kaldi-blas.h,
// which puts many symbols into global scope (like "real") via the header f2c.h // which puts many symbols into global scope (like "real") via the header f2c.h
typedef enum { typedef enum {
kTrans = 112, // = CblasTrans kTrans = 112, // = CblasTrans
kNoTrans = 111 // = CblasNoTrans kNoTrans = 111 // = CblasNoTrans
} MatrixTransposeType; } MatrixTransposeType;
typedef enum { typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType;
kSetZero,
kUndefined,
kCopyData
} MatrixResizeType;
typedef enum { typedef enum {
@ -53,24 +50,33 @@ typedef enum {
kTakeMeanAndCheck kTakeMeanAndCheck
} SpCopyType; } SpCopyType;
template<typename Real> class VectorBase; template <typename Real>
template<typename Real> class Vector; class VectorBase;
template<typename Real> class SubVector; template <typename Real>
template<typename Real> class MatrixBase; class Vector;
template<typename Real> class SubMatrix; template <typename Real>
template<typename Real> class Matrix; class SubVector;
template <typename Real>
class MatrixBase;
template <typename Real>
class SubMatrix;
template <typename Real>
class Matrix;
/// This class provides a way for switching between double and float types. /// This class provides a way for switching between double and float types.
template<typename T> class OtherReal { }; // useful in reading+writing routines template <typename T>
class OtherReal {}; // useful in reading+writing routines
// to switch double and float. // to switch double and float.
/// A specialized class for switching from float to double. /// A specialized class for switching from float to double.
template<> class OtherReal<float> { template <>
class OtherReal<float> {
public: public:
typedef double Real; typedef double Real;
}; };
/// A specialized class for switching from double to float. /// A specialized class for switching from double to float.
template<> class OtherReal<double> { template <>
class OtherReal<double> {
public: public:
typedef float Real; typedef float Real;
}; };
@ -81,12 +87,10 @@ typedef int32 SignedMatrixIndexT;
typedef uint32 UnsignedMatrixIndexT; typedef uint32 UnsignedMatrixIndexT;
// If you want to use size_t for the index type, do as follows instead: // If you want to use size_t for the index type, do as follows instead:
//typedef size_t MatrixIndexT; // typedef size_t MatrixIndexT;
//typedef ssize_t SignedMatrixIndexT; // typedef ssize_t SignedMatrixIndexT;
//typedef size_t UnsignedMatrixIndexT; // typedef size_t UnsignedMatrixIndexT;
} }
#endif // KALDI_MATRIX_MATRIX_COMMON_H_ #endif // KALDI_MATRIX_MATRIX_COMMON_H_

@ -1,14 +1,15 @@
project(kaldi)
include_directories( include_directories(
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
) )
add_subdirectory(base) add_subdirectory(base)
add_subdirectory(util) add_subdirectory(util)
add_subdirectory(lat) if(WITH_ASR)
add_subdirectory(fstext) add_subdirectory(lat)
add_subdirectory(decoder) add_subdirectory(fstext)
add_subdirectory(lm) add_subdirectory(decoder)
add_subdirectory(lm)
add_subdirectory(fstbin) add_subdirectory(fstbin)
add_subdirectory(lmbin) add_subdirectory(lmbin)
endif()

@ -44,7 +44,19 @@ typedef float BaseFloat;
#ifndef COMPILE_WITHOUT_OPENFST #ifndef COMPILE_WITHOUT_OPENFST
#ifdef WITH_ASR
#include <fst/types.h> #include <fst/types.h>
#else
using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;
using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;
#endif
namespace kaldi { namespace kaldi {
using ::int16; using ::int16;

@ -0,0 +1,18 @@
# set(CMAKE_CXX_STANDARD 11)
# # fastdeploy
# set(FASTDEPLOY_INSTALL_DIR "fdlib/fastdeploy-linux-x64-1.0.4" CACHE STRING force)
# if(NOT EXISTS ${FASTDEPLOY_INSTALL_DIR})
# message(FATAL_ERROR "Please using cmake -B build -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR}")
# endif()
# include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# # FastDeploy
# include_directories(${FASTDEPLOY_INCS})
add_executable(infer_onnx_silero_vad ${CMAKE_CURRENT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h)
# FastDeploy
target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS})

@ -0,0 +1,121 @@
English | [简体中文](README_CN.md)
# Silero VAD Deployment Example
This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
Before deployment, two steps require confirmation.
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
```bash
mkdir build
cd build
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# inference
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
- The above command works for Linux or MacOS. Refer to:
- [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows
## VAD C++ Interface
### Vad Class
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**Parameter**
> * **model_file**(str): Model file path
> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
### setAudioCofig function
**Must be called before the `init` function**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**Parameter**
> * **sr**(int): sampling rate
> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
> * **threshold**(float): Result probability judgment threshold
> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
> * **speech_pad_ms**(int): Used to calculate the end time of the speech
### init function
Used to initialize audio-related parameters.
```c++
void Vad::init();
```
### loadAudio function
Load audio.
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**Parameter**
> * **wavPath**(str): Audio file path
### Predict function
Used to start model reasoning.
```c++
bool Vad::Predict();
```
### getResult function
**Used to obtain reasoning results**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**Parameter**
> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
**The output result format is**`std::vector<std::map<std::string, float>>`
> Output a list, each element is a speech fragment
>
> Each clip can use 'start' to get the start time and 'end' to get the end time
### Tips
1. `The setAudioCofig`function must be called before the `init` function
2. The sampling rate of the input audio file must be consistent with that set in the code
- [Model Description](../)
- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)

@ -0,0 +1,119 @@
[English](README.md) | 简体中文
# Silero VAD 部署示例
本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境下载预编译部署库和samples代码参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。
```bash
mkdir build
cd build
# 下载FastDeploy预编译库用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
# 推理
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
```
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md)
## VAD C++ 接口
### Vad 类
```c++
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
```
**参数**
> * **model_file**(str): 模型文件路径
> * **runtime_option**(RuntimeOption): 后端推理配置默认为None即采用默认配置
### setAudioCofig 函数
**必须在`init`函数前调用**
```c++
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
```
**参数**
> * **sr**(int): 采样率
> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小
> * **threshold**(float): 结果概率判断阈值
> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值
> * **speech_pad_ms**(int): 用于计算 speach 结束时刻
### init 函数
用于初始化音频相关参数
```c++
void Vad::init();
```
### loadAudio 函数
加载音频
```c++
void Vad::loadAudio(const std::string& wavPath)
```
**参数**
> * **wavPath**(str): 音频文件路径
### Predict 函数
用于开始模型推理
```c++
bool Vad::Predict();
```
### getResult 函数
**用于获取推理结果**
```c++
std::vector<std::map<std::string, float>> Vad::getResult(
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
float mergeThreshold = 0.3);
```
**参数**
> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃
> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻
> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻
> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段
**输出结果格式为**`std::vector<std::map<std::string, float>>`
> 输出一个列表,每个元素是一个讲话片段
>
> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻
### 提示
1. `setAudioCofig`函数必须在`init`函数前调用
2. 输入的音频文件的采样率必须与代码中设置的保持一致
- [模型介绍](../)
- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md)

@ -0,0 +1,65 @@
#include "vad.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio "
"run_option, "
"e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav"
<< std::endl;
return -1;
}
std::string model_file = argv[1];
std::string audio_file = argv[2];
int sr = 16000;
Vad vad(model_file);
// custom config, but must be set before init
vad.SetConfig(sr, 32, 0.45f, 200, 0, 0);
vad.Init();
std::vector<float> inputWav; // [0, 1]
wav::WavReader wav_reader = wav::WavReader(audio_file);
assert(wav_reader.sample_rate() == sr);
auto num_samples = wav_reader.num_samples();
inputWav.resize(num_samples);
for (int i = 0; i < num_samples; i++) {
inputWav[i] = wav_reader.data()[i] / 32768;
}
int window_size_samples = vad.WindowSizeSamples();
for (int64_t j = 0; j < num_samples; j += window_size_samples) {
auto start = j;
auto end = start + window_size_samples >= num_samples
? num_samples
: start + window_size_samples;
auto current_chunk_size = end - start;
std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
assert(r.size() == current_chunk_size);
if (!vad.ForwardChunk(r)) {
std::cerr << "Failed to inference while using model:"
<< vad.ModelName() << "." << std::endl;
return false;
}
Vad::State s = vad.Postprocess();
std::cout << s << " ";
}
std::cout << std::endl;
std::vector<std::map<std::string, float>> result = vad.GetResult();
for (auto& res : result) {
std::cout << "speak start: " << res["start"]
<< " s, end: " << res["end"] << " s | ";
}
std::cout << "\b\b " << std::endl;
vad.Reset();
return 0;
}

@ -0,0 +1,306 @@
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "vad.h"
#include <cstring>
#include <iomanip>
#ifdef NDEBUG
#define LOG_DEBUG \
::fastdeploy::FDLogger(true, "[DEBUG]") << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t"
#else
#define LOG_DEBUG \
::fastdeploy::FDLogger(false, "[DEBUG]") \
<< __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
#endif
Vad::Vad(const std::string& model_file,
const fastdeploy::RuntimeOption&
custom_option /* = fastdeploy::RuntimeOption() */) {
valid_cpu_backends = {fastdeploy::Backend::ORT,
fastdeploy::Backend::OPENVINO};
valid_gpu_backends = {fastdeploy::Backend::ORT, fastdeploy::Backend::TRT};
runtime_option = custom_option;
// ORT backend
runtime_option.UseCpu();
runtime_option.UseOrtBackend();
runtime_option.model_format = fastdeploy::ModelFormat::ONNX;
// grap opt level
runtime_option.ort_option.graph_optimization_level = 99;
// one-thread
runtime_option.ort_option.intra_op_num_threads = 1;
runtime_option.ort_option.inter_op_num_threads = 1;
// model path
runtime_option.model_file = model_file;
}
void Vad::Init() {
std::call_once(init_, [&]() { initialized = Initialize(); });
}
std::string Vad::ModelName() const { return "VAD"; }
void Vad::SetConfig(int sr,
int frame_ms,
float threshold,
int min_silence_duration_ms,
int speech_pad_left_ms,
int speech_pad_right_ms) {
if (initialized) {
fastdeploy::FDERROR << "SetConfig must be called before init"
<< std::endl;
throw std::runtime_error("SetConfig must be called before init");
}
sample_rate_ = sr;
sr_per_ms_ = sr / 1000;
threshold_ = threshold;
frame_ms_ = frame_ms;
min_silence_samples_ = min_silence_duration_ms * sr_per_ms_;
speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_;
speech_pad_right_samples_ = speech_pad_right_ms * sr_per_ms_;
// init chunk size
window_size_samples_ = frame_ms * sr_per_ms_;
current_chunk_size_ = window_size_samples_;
fastdeploy::FDINFO << "sr=" << sr << " threshold=" << threshold
<< " frame_ms=" << frame_ms
<< " min_silence_duration_ms=" << min_silence_duration_ms
<< " speech_pad_left_ms=" << speech_pad_left_ms
<< " speech_pad_right_ms=" << speech_pad_right_ms;
}
void Vad::Reset() {
std::memset(h_.data(), 0.0f, h_.size() * sizeof(float));
std::memset(c_.data(), 0.0f, c_.size() * sizeof(float));
triggerd_ = false;
temp_end_ = 0;
current_sample_ = 0;
speakStart_.clear();
speakEnd_.clear();
states_.clear();
}
bool Vad::Initialize() {
// input & output holder
inputTensors_.resize(4);
outputTensors_.resize(3);
// input shape
input_node_dims_.emplace_back(1);
input_node_dims_.emplace_back(window_size_samples_);
// sr buffer
sr_.resize(1);
sr_[0] = sample_rate_;
// hidden state buffer
h_.resize(size_hc_);
c_.resize(size_hc_);
Reset();
// InitRuntime
if (!InitRuntime()) {
fastdeploy::FDERROR << "Failed to initialize fastdeploy backend."
<< std::endl;
return false;
}
fastdeploy::FDINFO << "init done.";
return true;
}
bool Vad::ForwardChunk(std::vector<float>& chunk) {
// last chunk may not be window_size_samples_
input_node_dims_.back() = chunk.size();
assert(window_size_samples_ >= chunk.size());
current_chunk_size_ = chunk.size();
inputTensors_[0].name = "input";
inputTensors_[0].SetExternalData(
input_node_dims_, fastdeploy::FDDataType::FP32, chunk.data());
inputTensors_[1].name = "sr";
inputTensors_[1].SetExternalData(
sr_node_dims_, fastdeploy::FDDataType::INT64, sr_.data());
inputTensors_[2].name = "h";
inputTensors_[2].SetExternalData(
hc_node_dims_, fastdeploy::FDDataType::FP32, h_.data());
inputTensors_[3].name = "c";
inputTensors_[3].SetExternalData(
hc_node_dims_, fastdeploy::FDDataType::FP32, c_.data());
if (!Infer(inputTensors_, &outputTensors_)) {
return false;
}
// Push forward sample index
current_sample_ += current_chunk_size_;
return true;
}
const Vad::State& Vad::Postprocess() {
// update prob, h, c
outputProb_ = *(float*)outputTensors_[0].Data();
auto* hn = static_cast<float*>(outputTensors_[1].MutableData());
std::memcpy(h_.data(), hn, h_.size() * sizeof(float));
auto* cn = static_cast<float*>(outputTensors_[2].MutableData());
std::memcpy(c_.data(), cn, c_.size() * sizeof(float));
if (outputProb_ < threshold_ && !triggerd_) {
// 1. Silence
LOG_DEBUG << "{ silence: " << 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::SIL);
} else if (outputProb_ >= threshold_ && !triggerd_) {
// 2. Start
triggerd_ = true;
speech_start_ =
current_sample_ - current_chunk_size_ - speech_pad_left_samples_;
float start_sec = 1.0 * speech_start_ / sample_rate_;
speakStart_.emplace_back(start_sec);
LOG_DEBUG << "{ speech start: " << start_sec
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::START);
} else if (outputProb_ >= threshold_ - 0.15 && triggerd_) {
// 3. Continue
if (temp_end_ != 0) {
// speech prob relaxation, speech continues again
LOG_DEBUG << "{ speech fake end(sil < min_silence_ms) to continue: "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
temp_end_ = 0;
} else {
// speech prob relaxation, keep tracking speech
LOG_DEBUG << "{ speech continue: "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
}
states_.emplace_back(Vad::State::SPEECH);
} else if (outputProb_ < threshold_ - 0.15 && triggerd_) {
// 4. End
if (temp_end_ == 0) {
temp_end_ = current_sample_;
}
// check possible speech end
if (current_sample_ - temp_end_ < min_silence_samples_) {
// a. silence < min_slience_samples, continue speaking
LOG_DEBUG << "{ speech fake end(sil < min_silence_ms): "
<< 1.0 * current_sample_ / sample_rate_
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::SIL);
} else {
// b. silence >= min_slience_samples, end speaking
speech_end_ = current_sample_ + speech_pad_right_samples_;
temp_end_ = 0;
triggerd_ = false;
auto end_sec = 1.0 * speech_end_ / sample_rate_;
speakEnd_.emplace_back(end_sec);
LOG_DEBUG << "{ speech end: " << end_sec
<< " s; prob: " << outputProb_ << " }";
states_.emplace_back(Vad::State::END);
}
}
return states_.back();
}
const std::vector<std::map<std::string, float>> Vad::GetResult(
float removeThreshold,
float expandHeadThreshold,
float expandTailThreshold,
float mergeThreshold) const {
float audioLength = 1.0 * current_sample_ / sample_rate_;
if (speakStart_.empty() && speakEnd_.empty()) {
return {};
}
if (speakEnd_.size() != speakStart_.size()) {
// set the audio length as the last end
speakEnd_.emplace_back(audioLength);
}
// Remove too short segments
// auto startIter = speakStart_.begin();
// auto endIter = speakEnd_.begin();
// while (startIter != speakStart_.end()) {
// if (removeThreshold < audioLength &&
// *endIter - *startIter < removeThreshold) {
// startIter = speakStart_.erase(startIter);
// endIter = speakEnd_.erase(endIter);
// } else {
// startIter++;
// endIter++;
// }
// }
// // Expand to avoid to tight cut.
// startIter = speakStart_.begin();
// endIter = speakEnd_.begin();
// *startIter = std::fmax(0.f, *startIter - expandHeadThreshold);
// *endIter = std::fmin(*endIter + expandTailThreshold, *(startIter + 1));
// endIter = speakEnd_.end() - 1;
// startIter = speakStart_.end() - 1;
// *startIter = fmax(*startIter - expandHeadThreshold, *(endIter - 1));
// *endIter = std::fmin(*endIter + expandTailThreshold, audioLength);
// for (int i = 1; i < speakStart_.size() - 1; ++i) {
// speakStart_[i] = std::fmax(speakStart_[i] - expandHeadThreshold,
// speakEnd_[i - 1]);
// speakEnd_[i] = std::fmin(speakEnd_[i] + expandTailThreshold,
// speakStart_[i + 1]);
// }
// // Merge very closed segments
// startIter = speakStart_.begin() + 1;
// endIter = speakEnd_.begin();
// while (startIter != speakStart_.end()) {
// if (*startIter - *endIter < mergeThreshold) {
// startIter = speakStart_.erase(startIter);
// endIter = speakEnd_.erase(endIter);
// } else {
// startIter++;
// endIter++;
// }
// }
std::vector<std::map<std::string, float>> result;
for (int i = 0; i < speakStart_.size(); ++i) {
result.emplace_back(std::map<std::string, float>(
{{"start", speakStart_[i]}, {"end", speakEnd_[i]}}));
}
return result;
}
std::ostream& operator<<(std::ostream& os, const Vad::State& s) {
switch (s) {
case Vad::State::SIL:
os << "[SIL]";
break;
case Vad::State::START:
os << "[STA]";
break;
case Vad::State::SPEECH:
os << "[SPE]";
break;
case Vad::State::END:
os << "[END]";
break;
default:
// illegal state
os << "[ILL]";
break;
}
return os;
}

@ -0,0 +1,124 @@
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <mutex>
#include <vector>
#include "./wav.h"
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/runtime.h"
class Vad : public fastdeploy::FastDeployModel {
public:
enum class State { SIL = 0, START, SPEECH, END };
friend std::ostream& operator<<(std::ostream& os, const Vad::State& s);
Vad(const std::string& model_file,
const fastdeploy::RuntimeOption& custom_option =
fastdeploy::RuntimeOption());
void Init();
void Reset();
void SetConfig(int sr,
int frame_ms,
float threshold,
int min_silence_duration_ms,
int speech_pad_left_ms,
int speech_pad_right_ms);
bool ForwardChunk(std::vector<float>& chunk);
const State& Postprocess();
const std::vector<std::map<std::string, float>> GetResult(
float removeThreshold = 0.0,
float expandHeadThreshold = 0.0,
float expandTailThreshold = 0,
float mergeThreshold = 0.0) const;
const std::vector<State> GetStates() const { return states_; }
int SampleRate() const { return sample_rate_; }
int FrameMs() const { return frame_ms_; }
int64_t WindowSizeSamples() const { return window_size_samples_; }
float Threshold() const { return threshold_; }
int MinSilenceDurationMs() const {
return min_silence_samples_ / sample_rate_;
}
int SpeechPadLeftMs() const {
return speech_pad_left_samples_ / sample_rate_;
}
int SpeechPadRightMs() const {
return speech_pad_right_samples_ / sample_rate_;
}
int MinSilenceSamples() const { return min_silence_samples_; }
int SpeechPadLeftSamples() const { return speech_pad_left_samples_; }
int SpeechPadRightSamples() const { return speech_pad_right_samples_; }
std::string ModelName() const override;
private:
bool Initialize();
private:
std::once_flag init_;
// input and output
std::vector<fastdeploy::FDTensor> inputTensors_;
std::vector<fastdeploy::FDTensor> outputTensors_;
// model states
bool triggerd_ = false;
unsigned int speech_start_ = 0;
unsigned int speech_end_ = 0;
unsigned int temp_end_ = 0;
unsigned int current_sample_ = 0;
unsigned int current_chunk_size_ = 0;
// MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes
float outputProb_;
std::vector<float> speakStart_;
mutable std::vector<float> speakEnd_;
std::vector<State> states_;
/* ========================================================================
*/
int sample_rate_ = 16000;
int frame_ms_ = 32; // 32, 64, 96 for 16k
float threshold_ = 0.5f;
int64_t window_size_samples_; // support 256 512 768 for 8k; 512 1024 1536
// for 16k.
int sr_per_ms_; // support 8 or 16
int min_silence_samples_; // sr_per_ms_ * frame_ms_
int speech_pad_left_samples_{0}; // usually 250ms
int speech_pad_right_samples_{0}; // usually 0
/* ========================================================================
*/
std::vector<int64_t> sr_;
const size_t size_hc_ = 2 * 1 * 64; // It's FIXED.
std::vector<float> h_;
std::vector<float> c_;
std::vector<int64_t> input_node_dims_;
const std::vector<int64_t> sr_node_dims_ = {1};
const std::vector<int64_t> hc_node_dims_ = {2, 1, 64};
};

@ -0,0 +1,197 @@
// Copyright (c) 2016 Personal (Binbin Zhang)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
namespace wav {
struct WavHeader {
char riff[4]; // "riff"
unsigned int size;
char wav[4]; // "WAVE"
char fmt[4]; // "fmt "
unsigned int fmt_size;
uint16_t format;
uint16_t channels;
unsigned int sample_rate;
unsigned int bytes_per_second;
uint16_t block_size;
uint16_t bit;
char data[4]; // "data"
unsigned int data_size;
};
class WavReader {
public:
WavReader() : data_(nullptr) {}
explicit WavReader(const std::string& filename) { Open(filename); }
bool Open(const std::string& filename) {
FILE* fp = fopen(filename.c_str(), "rb");
if (NULL == fp) {
std::cout << "Error in read " << filename;
return false;
}
WavHeader header;
fread(&header, 1, sizeof(header), fp);
if (header.fmt_size < 16) {
fprintf(stderr,
"WaveData: expect PCM format data "
"to have fmt chunk of at least size 16.\n");
return false;
} else if (header.fmt_size > 16) {
int offset = 44 - 8 + header.fmt_size - 16;
fseek(fp, offset, SEEK_SET);
fread(header.data, 8, sizeof(char), fp);
}
// check "riff" "WAVE" "fmt " "data"
// Skip any sub-chunks between "fmt" and "data". Usually there will
// be a single "fact" sub chunk, but on Windows there can also be a
// "list" sub chunk.
while (0 != strncmp(header.data, "data", 4)) {
// We will just ignore the data in these chunks.
fseek(fp, header.data_size, SEEK_CUR);
// read next sub chunk
fread(header.data, 8, sizeof(char), fp);
}
num_channel_ = header.channels;
sample_rate_ = header.sample_rate;
bits_per_sample_ = header.bit;
int num_data = header.data_size / (bits_per_sample_ / 8);
data_ = new float[num_data]; // Create 1-dim array
num_samples_ = num_data / num_channel_;
for (int i = 0; i < num_data; ++i) {
switch (bits_per_sample_) {
case 8: {
char sample;
fread(&sample, 1, sizeof(char), fp);
data_[i] = static_cast<float>(sample);
break;
}
case 16: {
int16_t sample;
fread(&sample, 1, sizeof(int16_t), fp);
// std::cout << sample;
data_[i] = static_cast<float>(sample);
// std::cout << data_[i];
break;
}
case 32: {
int sample;
fread(&sample, 1, sizeof(int), fp);
data_[i] = static_cast<float>(sample);
break;
}
default:
fprintf(stderr, "unsupported quantization bits");
exit(1);
}
}
fclose(fp);
return true;
}
int num_channel() const { return num_channel_; }
int sample_rate() const { return sample_rate_; }
int bits_per_sample() const { return bits_per_sample_; }
int num_samples() const { return num_samples_; }
const float* data() const { return data_; }
private:
int num_channel_;
int sample_rate_;
int bits_per_sample_;
int num_samples_; // sample points per channel
float* data_;
};
class WavWriter {
public:
WavWriter(const float* data,
int num_samples,
int num_channel,
int sample_rate,
int bits_per_sample)
: data_(data),
num_samples_(num_samples),
num_channel_(num_channel),
sample_rate_(sample_rate),
bits_per_sample_(bits_per_sample) {}
void Write(const std::string& filename) {
FILE* fp = fopen(filename.c_str(), "w");
// init char 'riff' 'WAVE' 'fmt ' 'data'
WavHeader header;
char wav_header[44] = {
0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56,
0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};
memcpy(&header, wav_header, sizeof(header));
header.channels = num_channel_;
header.bit = bits_per_sample_;
header.sample_rate = sample_rate_;
header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);
header.size = sizeof(header) - 8 + header.data_size;
header.bytes_per_second =
sample_rate_ * num_channel_ * (bits_per_sample_ / 8);
header.block_size = num_channel_ * (bits_per_sample_ / 8);
fwrite(&header, 1, sizeof(header), fp);
for (int i = 0; i < num_samples_; ++i) {
for (int j = 0; j < num_channel_; ++j) {
switch (bits_per_sample_) {
case 8: {
char sample =
static_cast<char>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
case 16: {
int16_t sample =
static_cast<int16_t>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
case 32: {
int sample =
static_cast<int>(data_[i * num_channel_ + j]);
fwrite(&sample, 1, sizeof(sample), fp);
break;
}
}
}
}
fclose(fp);
}
private:
const float* data_;
int num_samples_; // total float points in data_
int num_channel_;
int sample_rate_;
int bits_per_sample_;
};
} // namespace wav
Loading…
Cancel
Save