From 29af6bde4108e7357607140aa3a0f39aeace1ad1 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 28 Feb 2023 02:30:52 +0000 Subject: [PATCH] opt to compile asr,cls,vad; add vad; format code --- runtime/.gitignore | 3 + runtime/CMakeLists.txt | 177 +- runtime/build.sh | 2 +- runtime/cmake/fastdeploy.cmake | 15 +- runtime/engine/CMakeLists.txt | 19 +- .../engine/asr/recognizer/u2_recognizer.cc | 3 +- runtime/engine/common/CMakeLists.txt | 2 +- runtime/engine/common/base/CMakeLists.txt | 20 + .../common/base/{flags.h => flags.h.in} | 2 +- .../engine/common/base/{log.h => log.h.in} | 2 +- runtime/engine/common/frontend/cmvn.cc | 2 +- .../engine/common/frontend/feature-fbank.h | 1 + .../engine/common/frontend/feature-window.cc | 1 + runtime/engine/common/frontend/rfft.cc | 4 +- .../engine/common/matrix/kaldi-matrix-inl.h | 37 +- runtime/engine/common/matrix/kaldi-matrix.cc | 1396 ++++++++------- runtime/engine/common/matrix/kaldi-matrix.h | 1443 ++++++++------- .../engine/common/matrix/kaldi-vector-inl.h | 39 +- runtime/engine/common/matrix/kaldi-vector.cc | 1592 +++++++++-------- runtime/engine/common/matrix/kaldi-vector.h | 533 +++--- runtime/engine/common/matrix/matrix-common.h | 72 +- runtime/engine/kaldi/CMakeLists.txt | 15 +- runtime/engine/kaldi/base/kaldi-types.h | 12 + runtime/engine/vad/CMakeLists.txt | 18 + runtime/engine/vad/README.md | 121 ++ runtime/engine/vad/README_CN.md | 119 ++ runtime/engine/vad/infer_onnx_silero_vad.cc | 65 + runtime/engine/vad/vad.cc | 306 ++++ runtime/engine/vad/vad.h | 124 ++ runtime/engine/vad/wav.h | 197 ++ 30 files changed, 3811 insertions(+), 2531 deletions(-) create mode 100644 runtime/engine/common/base/CMakeLists.txt rename runtime/engine/common/base/{flags.h => flags.h.in} (96%) rename runtime/engine/common/base/{log.h => log.h.in} (96%) create mode 100644 runtime/engine/vad/CMakeLists.txt create mode 100644 runtime/engine/vad/README.md create mode 100644 runtime/engine/vad/README_CN.md create mode 100644 runtime/engine/vad/infer_onnx_silero_vad.cc create mode 100644 runtime/engine/vad/vad.cc create mode 100644 runtime/engine/vad/vad.h create mode 100644 runtime/engine/vad/wav.h diff --git a/runtime/.gitignore b/runtime/.gitignore index 0783b138c..9aa98ef7d 100644 --- a/runtime/.gitignore +++ b/runtime/.gitignore @@ -1,3 +1,6 @@ +engine/common/base/flags.h +engine/common/base/log.h + tools/valgrind* *log fc_patch/* diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 44ee3a589..015a1088d 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1) set(CMAKE_VERBOSE_MAKEFILE on) -# set std-14 -set(CMAKE_CXX_STANDARD 14) + include(FetchContent) include(ExternalProject) @@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off) get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}") set(FETCHCONTENT_BASE_DIR ${fc_patch}) +set(CMAKE_CXX_FLAGS) +set(CMAKE_CXX_FLAGS_DEBUG) +set(CMAKE_CXX_FLAGS_RELEASE) + +# set std-14 +set(CMAKE_CXX_STANDARD 14) + # compiler option # Keep the same with openfst, -fPIC or -fpic set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl") SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb") SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall") + +add_compile_options(-fPIC) ############################################################################### # Option Configurations ############################################################################### +option(WITH_ASR "build asr" ON) +option(WITH_CLS "build cls" ON) +option(WITH_VAD "build vad" ON) + option(TEST_DEBUG "option for debug" OFF) option(USE_PROFILING "enable c++ profling" OFF) option(WITH_TESTING "unit test" ON) @@ -47,102 +59,117 @@ option(WITH_TESTING "unit test" ON) option(USING_GPU "u2 compute on GPU." OFF) ############################################################################### -# Include third party +# Include Third Party ############################################################################### include(gflags) include(glog) -# openfst -include(openfst) -add_dependencies(openfst gflags glog) - -# paddle lib -include(paddleinference) - # gtest if(WITH_TESTING) include(gtest) # download, build, install gtest endif() + +# fastdeploy +include(fastdeploy) + +if(WITH_ASR) + # openfst + include(openfst) + add_dependencies(openfst gflags glog) +endif() + +############################################################################### +# Find Package +############################################################################### + # python/pybind11/threads find_package(Threads REQUIRED) # https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3 find_package(Python3 COMPONENTS Interpreter Development) find_package(pybind11 CONFIG) -if(Python3_FOUND) - message(STATUS "Python3_FOUND = ${Python3_FOUND}") - message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}") - message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}") - message(STATUS "Python3_INCLUDE_DIRS = ${Python3_INCLUDE_DIRS}") - message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}") - set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE) - set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE) -endif() - -message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}") -message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}") -if(pybind11_FOUND) - message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}") - message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}") - message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}") +if(WITH_ASR) + if(Python3_FOUND) + message(STATUS "Python3_FOUND = ${Python3_FOUND}") + message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}") + message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}") + message(STATUS "Python3_INCLUDE_DIRS = ${Python3_INCLUDE_DIRS}") + message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}") + set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE) + set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE) + endif() + + message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}") + message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}") + + if(pybind11_FOUND) + message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}") + message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}") + message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}") + endif() + + + # paddle libpaddle.so + # paddle include and link option + # -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so + execute_process( + COMMAND python -c "\ + import os;\ + import paddle;\ + include_dir=paddle.sysconfig.get_include();\ + paddle_dir=os.path.split(include_dir)[0];\ + libs_dir=os.path.join(paddle_dir, 'libs');\ + fluid_dir=os.path.join(paddle_dir, 'fluid');\ + out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\ + out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\ + " + OUTPUT_VARIABLE PADDLE_LINK_FLAGS + RESULT_VARIABLE SUCESS) + + message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS}) + string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS) + + # paddle compile option + # -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include + execute_process( + COMMAND python -c "\ + import paddle; \ + include_dir = paddle.sysconfig.get_include(); \ + print(f\"-I{include_dir}\"); \ + " + OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS) + message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS}) + string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS) + + + # for LD_LIBRARY_PATH + # set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/) + execute_process( + COMMAND python -c "\ + import os; \ + import paddle; \ + include_dir=paddle.sysconfig.get_include(); \ + paddle_dir=os.path.split(include_dir)[0]; \ + libs_dir=os.path.join(paddle_dir, 'libs'); \ + fluid_dir=os.path.join(paddle_dir, 'fluid'); \ + out=':'.join([libs_dir, fluid_dir]); print(out); \ + " + OUTPUT_VARIABLE PADDLE_LIB_DIRS) + message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS}) endif() -# paddle libpaddle.so -# paddle include and link option -# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so -execute_process( - COMMAND python -c "\ -import os;\ -import paddle;\ -include_dir=paddle.sysconfig.get_include();\ -paddle_dir=os.path.split(include_dir)[0];\ -libs_dir=os.path.join(paddle_dir, 'libs');\ -fluid_dir=os.path.join(paddle_dir, 'fluid');\ -out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\ -out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\ - " - OUTPUT_VARIABLE PADDLE_LINK_FLAGS - RESULT_VARIABLE SUCESS) - -message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS}) -string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS) - -# paddle compile option -# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include -execute_process( - COMMAND python -c "\ -import paddle; \ -include_dir = paddle.sysconfig.get_include(); \ -print(f\"-I{include_dir}\"); \ - " - OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS) -message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS}) -string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS) - - -# for LD_LIBRARY_PATH -# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/) -execute_process( - COMMAND python -c "\ -import os; \ -import paddle; \ -include_dir=paddle.sysconfig.get_include(); \ -paddle_dir=os.path.split(include_dir)[0]; \ -libs_dir=os.path.join(paddle_dir, 'libs'); \ -fluid_dir=os.path.join(paddle_dir, 'fluid'); \ -out=':'.join([libs_dir, fluid_dir]); print(out); \ - " - OUTPUT_VARIABLE PADDLE_LIB_DIRS) -message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS}) - -add_compile_options(-fPIC) ############################################################################### # Add local library ############################################################################### set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine) +message(STATUS "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}") +message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}") +message(STATUS "CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}") + + add_subdirectory(engine) diff --git a/runtime/build.sh b/runtime/build.sh index 94d250f5a..131fb7f1a 100755 --- a/runtime/build.sh +++ b/runtime/build.sh @@ -4,5 +4,5 @@ set -xe # the build script had verified in the paddlepaddle docker image. # please follow the instruction below to install PaddlePaddle image. # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html -cmake -B build +cmake -B build -DWITH_ASR=OFF -DWITH_CLS=OFF cmake --build build -j diff --git a/runtime/cmake/fastdeploy.cmake b/runtime/cmake/fastdeploy.cmake index 773414c19..cb9ceacda 100644 --- a/runtime/cmake/fastdeploy.cmake +++ b/runtime/cmake/fastdeploy.cmake @@ -8,11 +8,11 @@ windows_x86") set(CMAKE_VERBOSE_MAKEFILE ON) set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy) -if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz) +if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz) exec_program("mkdir -p ${FASTDEPLOY_DIR} && - wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} && - tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} && - mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64") + wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} && + tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} && + mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64") endif() if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz) @@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7") endif() include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) -include_directories(${FASTDEPLOY_INCS}) \ No newline at end of file + +# fix compiler flags conflict, since fastdeploy using c++11 for project +set(CMAKE_CXX_STANDARD 14) + +include_directories(${FASTDEPLOY_INCS}) +message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}") \ No newline at end of file diff --git a/runtime/engine/CMakeLists.txt b/runtime/engine/CMakeLists.txt index 42399fe92..242a579b5 100644 --- a/runtime/engine/CMakeLists.txt +++ b/runtime/engine/CMakeLists.txt @@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common) -add_subdirectory(asr) -add_subdirectory(common) add_subdirectory(kaldi) -add_subdirectory(codelab) -add_subdirectory(cls) \ No newline at end of file +add_subdirectory(common) + +if(WITH_ASR) + add_subdirectory(asr) +endif() + +if(WITH_CLS) + add_subdirectory(cls) +endif() + +if(WITH_VAD) + add_subdirectory(vad) +endif() + +add_subdirectory(codelab) \ No newline at end of file diff --git a/runtime/engine/asr/recognizer/u2_recognizer.cc b/runtime/engine/asr/recognizer/u2_recognizer.cc index da1348f55..36fecb0a2 100644 --- a/runtime/engine/asr/recognizer/u2_recognizer.cc +++ b/runtime/engine/asr/recognizer/u2_recognizer.cc @@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource) decoder_ = std::make_unique( resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts); } else { - decoder_ = std::make_unique(resource.decoder_opts.tlg_decoder_opts); + decoder_ = std::make_unique( + resource.decoder_opts.tlg_decoder_opts); } symbol_table_ = decoder_->WordSymbolTable(); diff --git a/runtime/engine/common/CMakeLists.txt b/runtime/engine/common/CMakeLists.txt index 5e0a7d573..4f399eea8 100644 --- a/runtime/engine/common/CMakeLists.txt +++ b/runtime/engine/common/CMakeLists.txt @@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../ ) add_subdirectory(utils) - +add_subdirectory(base) add_subdirectory(matrix) include_directories( diff --git a/runtime/engine/common/base/CMakeLists.txt b/runtime/engine/common/base/CMakeLists.txt new file mode 100644 index 000000000..ab7108748 --- /dev/null +++ b/runtime/engine/common/base/CMakeLists.txt @@ -0,0 +1,20 @@ +if(WITH_ASR) + add_compile_options(-DWITH_ASR) + set(PPS_FLAGS_LIB "fst/flags.h") + set(PPS_GLOB_LIB "fst/log.h") +else() + set(PPS_FLAGS_LIB "gflags/gflags.h") + set(PPS_GLOB_LIB "glog/logging.h") +endif() + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in + ${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY + ) +message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h") + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/log.h.in + ${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY + ) +message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h") \ No newline at end of file diff --git a/runtime/engine/common/base/flags.h b/runtime/engine/common/base/flags.h.in similarity index 96% rename from runtime/engine/common/base/flags.h rename to runtime/engine/common/base/flags.h.in index 41df0d452..161366e8a 100644 --- a/runtime/engine/common/base/flags.h +++ b/runtime/engine/common/base/flags.h.in @@ -14,4 +14,4 @@ #pragma once -#include "fst/flags.h" +#include "@PPS_FLAGS_LIB@" \ No newline at end of file diff --git a/runtime/engine/common/base/log.h b/runtime/engine/common/base/log.h.in similarity index 96% rename from runtime/engine/common/base/log.h rename to runtime/engine/common/base/log.h.in index c613b98c3..0dd588bc4 100644 --- a/runtime/engine/common/base/log.h +++ b/runtime/engine/common/base/log.h.in @@ -14,4 +14,4 @@ #pragma once -#include "fst/log.h" +#include "@PPS_GLOB_LIB@" diff --git a/runtime/engine/common/frontend/cmvn.cc b/runtime/engine/common/frontend/cmvn.cc index 8375d3d16..0f1108208 100644 --- a/runtime/engine/common/frontend/cmvn.cc +++ b/runtime/engine/common/frontend/cmvn.cc @@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr base_extractor) dim_ = mean_stats_.size() - 1; } -void CMVN::ReadCMVNFromJson(string cmvn_file) { +void CMVN::ReadCMVNFromJson(std::string cmvn_file) { std::string json_str = ppspeech::ReadFile2String(cmvn_file); picojson::value value; std::string err; diff --git a/runtime/engine/common/frontend/feature-fbank.h b/runtime/engine/common/frontend/feature-fbank.h index 30085245d..3dab793f3 100644 --- a/runtime/engine/common/frontend/feature-fbank.h +++ b/runtime/engine/common/frontend/feature-fbank.h @@ -21,6 +21,7 @@ #ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ #define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_ +#include #include #include "frontend/feature-window.h" diff --git a/runtime/engine/common/frontend/feature-window.cc b/runtime/engine/common/frontend/feature-window.cc index 1c474ccb5..43c736e01 100644 --- a/runtime/engine/common/frontend/feature-window.cc +++ b/runtime/engine/common/frontend/feature-window.cc @@ -7,6 +7,7 @@ #include "frontend/feature-window.h" #include +#include #include #ifndef M_2PI diff --git a/runtime/engine/common/frontend/rfft.cc b/runtime/engine/common/frontend/rfft.cc index f0a3ebc7a..8cdb634ff 100644 --- a/runtime/engine/common/frontend/rfft.cc +++ b/runtime/engine/common/frontend/rfft.cc @@ -17,12 +17,12 @@ */ #include "frontend/rfft.h" +#include "base/log.h" #include +#include #include -#include "base/log.h" - // see fftsg.c #ifdef __cplusplus extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w); diff --git a/runtime/engine/common/matrix/kaldi-matrix-inl.h b/runtime/engine/common/matrix/kaldi-matrix-inl.h index eafbc6fb4..ed18859de 100644 --- a/runtime/engine/common/matrix/kaldi-matrix-inl.h +++ b/runtime/engine/common/matrix/kaldi-matrix-inl.h @@ -25,40 +25,41 @@ namespace kaldi { /// Empty constructor -template -Matrix::Matrix(): MatrixBase(NULL, 0, 0, 0) { } +template +Matrix::Matrix() : MatrixBase(NULL, 0, 0, 0) {} /* template<> template<> -void MatrixBase::AddVecVec(const float alpha, const VectorBase &ra, const VectorBase &rb); +void MatrixBase::AddVecVec(const float alpha, const VectorBase +&ra, const VectorBase &rb); template<> template<> -void MatrixBase::AddVecVec(const double alpha, const VectorBase &ra, const VectorBase &rb); +void MatrixBase::AddVecVec(const double alpha, const VectorBase +&ra, const VectorBase &rb); */ -template -inline std::ostream & operator << (std::ostream & os, const MatrixBase & M) { - M.Write(os, false); - return os; +template +inline std::ostream& operator<<(std::ostream& os, const MatrixBase& M) { + M.Write(os, false); + return os; } -template -inline std::istream & operator >> (std::istream & is, Matrix & M) { - M.Read(is, false); - return is; +template +inline std::istream& operator>>(std::istream& is, Matrix& M) { + M.Read(is, false); + return is; } -template -inline std::istream & operator >> (std::istream & is, MatrixBase & M) { - M.Read(is, false); - return is; +template +inline std::istream& operator>>(std::istream& is, MatrixBase& M) { + M.Read(is, false); + return is; } -}// namespace kaldi +} // namespace kaldi #endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_ - diff --git a/runtime/engine/common/matrix/kaldi-matrix.cc b/runtime/engine/common/matrix/kaldi-matrix.cc index e446a6bf7..6f65fb0a0 100644 --- a/runtime/engine/common/matrix/kaldi-matrix.cc +++ b/runtime/engine/common/matrix/kaldi-matrix.cc @@ -166,14 +166,19 @@ void MatrixBase::AddMatMat(const Real alpha, const MatrixBase& B, MatrixTransposeType transB, const Real beta) { - KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) - || (transA == kTrans && transB == kNoTrans && A.num_rows_ == B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) - || (transA == kNoTrans && transB == kTrans && A.num_cols_ == B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) - || (transA == kTrans && transB == kTrans && A.num_rows_ == B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); + KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == +B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) + || (transA == kTrans && transB == kNoTrans && A.num_rows_ == +B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) + || (transA == kNoTrans && transB == kTrans && A.num_cols_ == +B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) + || (transA == kTrans && transB == kTrans && A.num_rows_ == +B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); KALDI_ASSERT(&A != this && &B != this); if (num_rows_ == 0) return; cblas_Xgemm(alpha, transA, A.data_, A.num_rows_, A.num_cols_, A.stride_, - transB, B.data_, B.stride_, beta, data_, num_rows_, num_cols_, stride_); + transB, B.data_, B.stride_, beta, data_, num_rows_, num_cols_, +stride_); } @@ -191,7 +196,8 @@ void MatrixBase::SetMatMatDivMat(const MatrixBase& A, id = od * (o / i); /// o / i is either zero or "scale". } else { id = od; /// Just imagine the scale was 1.0. This is somehow true in - /// expectation; anyway, this case should basically never happen so it doesn't + /// expectation; anyway, this case should basically never happen so it +doesn't /// really matter. } (*this)(r, c) = id; @@ -200,25 +206,25 @@ void MatrixBase::SetMatMatDivMat(const MatrixBase& A, } */ -//template -//void MatrixBase::CopyLowerToUpper() { - //KALDI_ASSERT(num_rows_ == num_cols_); - //Real *data = data_; - //MatrixIndexT num_rows = num_rows_, stride = stride_; - //for (int32 i = 0; i < num_rows; i++) - //for (int32 j = 0; j < i; j++) - //data[j * stride + i ] = data[i * stride + j]; +// template +// void MatrixBase::CopyLowerToUpper() { +// KALDI_ASSERT(num_rows_ == num_cols_); +// Real *data = data_; +// MatrixIndexT num_rows = num_rows_, stride = stride_; +// for (int32 i = 0; i < num_rows; i++) +// for (int32 j = 0; j < i; j++) +// data[j * stride + i ] = data[i * stride + j]; //} -//template -//void MatrixBase::CopyUpperToLower() { - //KALDI_ASSERT(num_rows_ == num_cols_); - //Real *data = data_; - //MatrixIndexT num_rows = num_rows_, stride = stride_; - //for (int32 i = 0; i < num_rows; i++) - //for (int32 j = 0; j < i; j++) - //data[i * stride + j] = data[j * stride + i]; +// template +// void MatrixBase::CopyUpperToLower() { +// KALDI_ASSERT(num_rows_ == num_cols_); +// Real *data = data_; +// MatrixIndexT num_rows = num_rows_, stride = stride_; +// for (int32 i = 0; i < num_rows; i++) +// for (int32 j = 0; j < i; j++) +// data[i * stride + j] = data[j * stride + i]; //} /* @@ -263,10 +269,14 @@ void MatrixBase::AddMatSmat(const Real alpha, const MatrixBase &B, MatrixTransposeType transB, const Real beta) { - KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) - || (transA == kTrans && transB == kNoTrans && A.num_rows_ == B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) - || (transA == kNoTrans && transB == kTrans && A.num_cols_ == B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) - || (transA == kTrans && transB == kTrans && A.num_rows_ == B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); + KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == +B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) + || (transA == kTrans && transB == kNoTrans && A.num_rows_ == +B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) + || (transA == kNoTrans && transB == kTrans && A.num_cols_ == +B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) + || (transA == kTrans && transB == kTrans && A.num_rows_ == +B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); KALDI_ASSERT(&A != this && &B != this); // We iterate over the columns of B. @@ -301,10 +311,14 @@ void MatrixBase::AddSmatMat(const Real alpha, const MatrixBase &B, MatrixTransposeType transB, const Real beta) { - KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) - || (transA == kTrans && transB == kNoTrans && A.num_rows_ == B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) - || (transA == kNoTrans && transB == kTrans && A.num_cols_ == B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) - || (transA == kTrans && transB == kTrans && A.num_rows_ == B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); + KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == +B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) + || (transA == kTrans && transB == kNoTrans && A.num_rows_ == +B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) + || (transA == kNoTrans && transB == kTrans && A.num_cols_ == +B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) + || (transA == kTrans && transB == kTrans && A.num_rows_ == +B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); KALDI_ASSERT(&A != this && &B != this); MatrixIndexT Astride = A.stride_, Bstride = B.stride_, stride = this->stride_, @@ -342,7 +356,8 @@ void MatrixBase::AddSpSp(const Real alpha, const SpMatrix &A_in, // fully (to save work, we used the matrix constructor from SpMatrix). // CblasLeft means A is on the left: C <-- alpha A B + beta C if (sz == 0) return; - cblas_Xsymm(alpha, sz, A.data_, A.stride_, B.data_, B.stride_, beta, data_, stride_); + cblas_Xsymm(alpha, sz, A.data_, A.stride_, B.data_, B.stride_, beta, data_, +stride_); } template @@ -352,13 +367,15 @@ void MatrixBase::AddMat(const Real alpha, const MatrixBase& A, if (transA == kNoTrans) { Scale(alpha + 1.0); } else { - KALDI_ASSERT(num_rows_ == num_cols_ && "AddMat: adding to self (transposed): not symmetric."); + KALDI_ASSERT(num_rows_ == num_cols_ && "AddMat: adding to self +(transposed): not symmetric."); Real *data = data_; if (alpha == 1.0) { // common case-- handle separately. for (MatrixIndexT row = 0; row < num_rows_; row++) { for (MatrixIndexT col = 0; col < row; col++) { Real *lower = data + (row * stride_) + col, *upper = data + (col - * stride_) + row; + * +stride_) + row; Real sum = *lower + *upper; *lower = *upper = sum; } @@ -368,7 +385,8 @@ void MatrixBase::AddMat(const Real alpha, const MatrixBase& A, for (MatrixIndexT row = 0; row < num_rows_; row++) { for (MatrixIndexT col = 0; col < row; col++) { Real *lower = data + (row * stride_) + col, *upper = data + (col - * stride_) + row; + * +stride_) + row; Real lower_tmp = *lower; *lower += alpha * *upper; *upper += alpha * lower_tmp; @@ -390,7 +408,8 @@ void MatrixBase::AddMat(const Real alpha, const MatrixBase& A, } else { KALDI_ASSERT(A.num_cols_ == num_rows_ && A.num_rows_ == num_cols_); if (num_rows_ == 0) return; - for (MatrixIndexT row = 0; row < num_rows_; row++, adata++, data += stride) + for (MatrixIndexT row = 0; row < num_rows_; row++, adata++, data += +stride) cblas_Xaxpy(num_cols_, alpha, adata, aStride, data, 1); } } @@ -503,7 +522,8 @@ void MatrixBase::AddMatSmat(Real alpha, const MatrixBase &A, Real alpha_B_kj = alpha * p.second; Real *this_col_j = this->Data() + j; // Add to entire 'j'th column of *this at once using cblas_Xaxpy. - // pass stride to write a colmun as matrices are stored in row major order. + // pass stride to write a colmun as matrices are stored in row major +order. cblas_Xaxpy(this_num_rows, alpha_B_kj, a_col_k, A.stride_, this_col_j, this->stride_); //for (MatrixIndexT i = 0; i < this_num_rows; ++i) @@ -529,10 +549,11 @@ void MatrixBase::AddMatSmat(Real alpha, const MatrixBase &A, Real alpha_B_jk = alpha * p.second; const Real *a_col_k = A.Data() + k; // Add to entire 'j'th column of *this at once using cblas_Xaxpy. - // pass stride to write a column as matrices are stored in row major order. + // pass stride to write a column as matrices are stored in row major +order. cblas_Xaxpy(this_num_rows, alpha_B_jk, a_col_k, A.stride_, this_col_j, this->stride_); - //for (MatrixIndexT i = 0; i < this_num_rows; ++i) + //for (MatrixIndexT i = 0; i < this_num_rows; ++i) // this_col_j[i*this->stride_] += alpha_B_jk * a_col_k[i*A.stride_]; } } @@ -586,7 +607,8 @@ void MatrixBase::AddDiagVecMat( Real *data = data_; const Real *Mdata = M.Data(), *vdata = v.Data(); if (num_rows_ == 0) return; - for (MatrixIndexT i = 0; i < num_rows; i++, data += stride, Mdata += M_row_stride, vdata++) + for (MatrixIndexT i = 0; i < num_rows; i++, data += stride, Mdata += +M_row_stride, vdata++) cblas_Xaxpy(num_cols, alpha * *vdata, Mdata, M_col_stride, data, 1); } @@ -620,7 +642,8 @@ void MatrixBase::AddMatDiagVec( if (num_rows_ == 0) return; for (MatrixIndexT i = 0; i < num_rows; i++){ for(MatrixIndexT j = 0; j < num_cols; j ++ ){ - data[i*stride + j] += alpha * vdata[j] * Mdata[i*M_row_stride + j*M_col_stride]; + data[i*stride + j] += alpha * vdata[j] * Mdata[i*M_row_stride + +j*M_col_stride]; } } } @@ -655,8 +678,10 @@ void MatrixBase::LapackGesvd(VectorBase *s, MatrixBase *U_in, KALDI_ASSERT(s != NULL && U_in != this && V_in != this); Matrix tmpU, tmpV; - if (U_in == NULL) tmpU.Resize(this->num_rows_, 1); // work-space if U_in empty. - if (V_in == NULL) tmpV.Resize(1, this->num_cols_); // work-space if V_in empty. + if (U_in == NULL) tmpU.Resize(this->num_rows_, 1); // work-space if U_in +empty. + if (V_in == NULL) tmpV.Resize(1, this->num_cols_); // work-space if V_in +empty. /// Impementation notes: /// Lapack works in column-order, therefore the dimensions of *this are @@ -690,8 +715,10 @@ void MatrixBase::LapackGesvd(VectorBase *s, MatrixBase *U_in, KaldiBlasInt result; // query for work space - char *u_job = const_cast(U_in ? "s" : "N"); // "s" == skinny, "N" == "none." - char *v_job = const_cast(V_in ? "s" : "N"); // "s" == skinny, "N" == "none." + char *u_job = const_cast(U_in ? "s" : "N"); // "s" == skinny, "N" == +"none." + char *v_job = const_cast(V_in ? "s" : "N"); // "s" == skinny, "N" == +"none." clapack_Xgesvd(v_job, u_job, &M, &N, data_, &LDA, s->Data(), @@ -700,7 +727,8 @@ void MatrixBase::LapackGesvd(VectorBase *s, MatrixBase *U_in, &work_query, &l_work, &result); - KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong arguments"); + KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong +arguments"); l_work = static_cast(work_query); Real *p_work; @@ -718,7 +746,8 @@ void MatrixBase::LapackGesvd(VectorBase *s, MatrixBase *U_in, p_work, &l_work, &result); - KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong arguments"); + KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong +arguments"); if (result != 0) { KALDI_WARN << "CLAPACK sgesvd_ : some weird convergence not satisfied"; @@ -729,167 +758,166 @@ void MatrixBase::LapackGesvd(VectorBase *s, MatrixBase *U_in, #endif */ // Copy constructor. Copies data to newly allocated memory. -template -Matrix::Matrix (const MatrixBase & M, - MatrixTransposeType trans/*=kNoTrans*/) +template +Matrix::Matrix(const MatrixBase &M, + MatrixTransposeType trans /*=kNoTrans*/) : MatrixBase() { - if (trans == kNoTrans) { - Resize(M.num_rows_, M.num_cols_); - this->CopyFromMat(M); - } else { - Resize(M.num_cols_, M.num_rows_); - this->CopyFromMat(M, kTrans); - } + if (trans == kNoTrans) { + Resize(M.num_rows_, M.num_cols_); + this->CopyFromMat(M); + } else { + Resize(M.num_cols_, M.num_rows_); + this->CopyFromMat(M, kTrans); + } } // Copy constructor. Copies data to newly allocated memory. -template -Matrix::Matrix (const Matrix & M): - MatrixBase() { - Resize(M.num_rows_, M.num_cols_); - this->CopyFromMat(M); +template +Matrix::Matrix(const Matrix &M) : MatrixBase() { + Resize(M.num_rows_, M.num_cols_); + this->CopyFromMat(M); } /// Copy constructor from another type. -template -template -Matrix::Matrix(const MatrixBase & M, - MatrixTransposeType trans) : MatrixBase() { - if (trans == kNoTrans) { - Resize(M.NumRows(), M.NumCols()); - this->CopyFromMat(M); - } else { - Resize(M.NumCols(), M.NumRows()); - this->CopyFromMat(M, kTrans); - } +template +template +Matrix::Matrix(const MatrixBase &M, MatrixTransposeType trans) + : MatrixBase() { + if (trans == kNoTrans) { + Resize(M.NumRows(), M.NumCols()); + this->CopyFromMat(M); + } else { + Resize(M.NumCols(), M.NumRows()); + this->CopyFromMat(M, kTrans); + } } // Instantiate this constructor for float->double and double->float. -template -Matrix::Matrix(const MatrixBase & M, - MatrixTransposeType trans); -template -Matrix::Matrix(const MatrixBase & M, - MatrixTransposeType trans); +template Matrix::Matrix(const MatrixBase &M, + MatrixTransposeType trans); +template Matrix::Matrix(const MatrixBase &M, + MatrixTransposeType trans); -template +template inline void Matrix::Init(const MatrixIndexT rows, const MatrixIndexT cols, const MatrixStrideType stride_type) { - if (rows * cols == 0) { - KALDI_ASSERT(rows == 0 && cols == 0); - this->num_rows_ = 0; - this->num_cols_ = 0; - this->stride_ = 0; - this->data_ = NULL; - return; - } - KALDI_ASSERT(rows > 0 && cols > 0); - MatrixIndexT skip, stride; - size_t size; - void *data; // aligned memory block - void *temp; // memory block to be really freed - - // compute the size of skip and real cols - skip = ((16 / sizeof(Real)) - cols % (16 / sizeof(Real))) - % (16 / sizeof(Real)); - stride = cols + skip; - size = static_cast(rows) * static_cast(stride) - * sizeof(Real); - - // allocate the memory and set the right dimensions and parameters - if (NULL != (data = KALDI_MEMALIGN(16, size, &temp))) { - MatrixBase::data_ = static_cast (data); - MatrixBase::num_rows_ = rows; - MatrixBase::num_cols_ = cols; - MatrixBase::stride_ = (stride_type == kDefaultStride ? stride : cols); - } else { - throw std::bad_alloc(); - } + if (rows * cols == 0) { + KALDI_ASSERT(rows == 0 && cols == 0); + this->num_rows_ = 0; + this->num_cols_ = 0; + this->stride_ = 0; + this->data_ = NULL; + return; + } + KALDI_ASSERT(rows > 0 && cols > 0); + MatrixIndexT skip, stride; + size_t size; + void *data; // aligned memory block + void *temp; // memory block to be really freed + + // compute the size of skip and real cols + skip = ((16 / sizeof(Real)) - cols % (16 / sizeof(Real))) % + (16 / sizeof(Real)); + stride = cols + skip; + size = + static_cast(rows) * static_cast(stride) * sizeof(Real); + + // allocate the memory and set the right dimensions and parameters + if (NULL != (data = KALDI_MEMALIGN(16, size, &temp))) { + MatrixBase::data_ = static_cast(data); + MatrixBase::num_rows_ = rows; + MatrixBase::num_cols_ = cols; + MatrixBase::stride_ = + (stride_type == kDefaultStride ? stride : cols); + } else { + throw std::bad_alloc(); + } } -template +template void Matrix::Resize(const MatrixIndexT rows, const MatrixIndexT cols, MatrixResizeType resize_type, MatrixStrideType stride_type) { - // the next block uses recursion to handle what we have to do if - // resize_type == kCopyData. - if (resize_type == kCopyData) { - if (this->data_ == NULL || rows == 0) resize_type = kSetZero; // nothing to copy. - else if (rows == this->num_rows_ && cols == this->num_cols_ && - (stride_type == kDefaultStride || this->stride_ == this->num_cols_)) { return; } // nothing to do. - else { - // set tmp to a matrix of the desired size; if new matrix - // is bigger in some dimension, zero it. - MatrixResizeType new_resize_type = - (rows > this->num_rows_ || cols > this->num_cols_) ? kSetZero : kUndefined; - Matrix tmp(rows, cols, new_resize_type, stride_type); - MatrixIndexT rows_min = std::min(rows, this->num_rows_), - cols_min = std::min(cols, this->num_cols_); - tmp.Range(0, rows_min, 0, cols_min). - CopyFromMat(this->Range(0, rows_min, 0, cols_min)); - tmp.Swap(this); - // and now let tmp go out of scope, deleting what was in *this. - return; + // the next block uses recursion to handle what we have to do if + // resize_type == kCopyData. + if (resize_type == kCopyData) { + if (this->data_ == NULL || rows == 0) + resize_type = kSetZero; // nothing to copy. + else if (rows == this->num_rows_ && cols == this->num_cols_ && + (stride_type == kDefaultStride || + this->stride_ == this->num_cols_)) { + return; + } // nothing to do. + else { + // set tmp to a matrix of the desired size; if new matrix + // is bigger in some dimension, zero it. + MatrixResizeType new_resize_type = + (rows > this->num_rows_ || cols > this->num_cols_) ? kSetZero + : kUndefined; + Matrix tmp(rows, cols, new_resize_type, stride_type); + MatrixIndexT rows_min = std::min(rows, this->num_rows_), + cols_min = std::min(cols, this->num_cols_); + tmp.Range(0, rows_min, 0, cols_min) + .CopyFromMat(this->Range(0, rows_min, 0, cols_min)); + tmp.Swap(this); + // and now let tmp go out of scope, deleting what was in *this. + return; + } } - } - // At this point, resize_type == kSetZero or kUndefined. + // At this point, resize_type == kSetZero or kUndefined. - if (MatrixBase::data_ != NULL) { - if (rows == MatrixBase::num_rows_ - && cols == MatrixBase::num_cols_) { - if (resize_type == kSetZero) - this->SetZero(); - return; + if (MatrixBase::data_ != NULL) { + if (rows == MatrixBase::num_rows_ && + cols == MatrixBase::num_cols_) { + if (resize_type == kSetZero) this->SetZero(); + return; + } else + Destroy(); } - else - Destroy(); - } - Init(rows, cols, stride_type); - if (resize_type == kSetZero) MatrixBase::SetZero(); + Init(rows, cols, stride_type); + if (resize_type == kSetZero) MatrixBase::SetZero(); } -template -template +template +template void MatrixBase::CopyFromMat(const MatrixBase &M, MatrixTransposeType Trans) { - if (sizeof(Real) == sizeof(OtherReal) && - static_cast(M.Data()) == - static_cast(this->Data())) { - // CopyFromMat called on same data. Nothing to do (except sanity checks). - KALDI_ASSERT(Trans == kNoTrans && M.NumRows() == NumRows() && - M.NumCols() == NumCols() && M.Stride() == Stride()); - return; - } - if (Trans == kNoTrans) { - KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == M.NumCols()); - for (MatrixIndexT i = 0; i < num_rows_; i++) - (*this).Row(i).CopyFromVec(M.Row(i)); - } else { - KALDI_ASSERT(num_cols_ == M.NumRows() && num_rows_ == M.NumCols()); - int32 this_stride = stride_, other_stride = M.Stride(); - Real *this_data = data_; - const OtherReal *other_data = M.Data(); - for (MatrixIndexT i = 0; i < num_rows_; i++) - for (MatrixIndexT j = 0; j < num_cols_; j++) - this_data[i * this_stride + j] = other_data[j * other_stride + i]; - } + if (sizeof(Real) == sizeof(OtherReal) && + static_cast(M.Data()) == + static_cast(this->Data())) { + // CopyFromMat called on same data. Nothing to do (except sanity + // checks). + KALDI_ASSERT(Trans == kNoTrans && M.NumRows() == NumRows() && + M.NumCols() == NumCols() && M.Stride() == Stride()); + return; + } + if (Trans == kNoTrans) { + KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == M.NumCols()); + for (MatrixIndexT i = 0; i < num_rows_; i++) + (*this).Row(i).CopyFromVec(M.Row(i)); + } else { + KALDI_ASSERT(num_cols_ == M.NumRows() && num_rows_ == M.NumCols()); + int32 this_stride = stride_, other_stride = M.Stride(); + Real *this_data = data_; + const OtherReal *other_data = M.Data(); + for (MatrixIndexT i = 0; i < num_rows_; i++) + for (MatrixIndexT j = 0; j < num_cols_; j++) + this_data[i * this_stride + j] = + other_data[j * other_stride + i]; + } } // template instantiations. -template -void MatrixBase::CopyFromMat(const MatrixBase & M, - MatrixTransposeType Trans); -template -void MatrixBase::CopyFromMat(const MatrixBase & M, - MatrixTransposeType Trans); -template -void MatrixBase::CopyFromMat(const MatrixBase & M, - MatrixTransposeType Trans); -template -void MatrixBase::CopyFromMat(const MatrixBase & M, - MatrixTransposeType Trans); +template void MatrixBase::CopyFromMat(const MatrixBase &M, + MatrixTransposeType Trans); +template void MatrixBase::CopyFromMat(const MatrixBase &M, + MatrixTransposeType Trans); +template void MatrixBase::CopyFromMat(const MatrixBase &M, + MatrixTransposeType Trans); +template void MatrixBase::CopyFromMat(const MatrixBase &M, + MatrixTransposeType Trans); /* // Specialize the template for CopyFromSp for float, float. @@ -987,99 +1015,97 @@ void MatrixBase::CopyFromTp(const TpMatrix & M, MatrixTransposeType trans); */ -template +template void MatrixBase::CopyRowsFromVec(const VectorBase &rv) { - if (rv.Dim() == num_rows_*num_cols_) { - if (stride_ == num_cols_) { - // one big copy operation. - const Real *rv_data = rv.Data(); - std::memcpy(data_, rv_data, sizeof(Real)*num_rows_*num_cols_); - } else { - const Real *rv_data = rv.Data(); - for (MatrixIndexT r = 0; r < num_rows_; r++) { - Real *row_data = RowData(r); - for (MatrixIndexT c = 0; c < num_cols_; c++) { - row_data[c] = rv_data[c]; + if (rv.Dim() == num_rows_ * num_cols_) { + if (stride_ == num_cols_) { + // one big copy operation. + const Real *rv_data = rv.Data(); + std::memcpy(data_, rv_data, sizeof(Real) * num_rows_ * num_cols_); + } else { + const Real *rv_data = rv.Data(); + for (MatrixIndexT r = 0; r < num_rows_; r++) { + Real *row_data = RowData(r); + for (MatrixIndexT c = 0; c < num_cols_; c++) { + row_data[c] = rv_data[c]; + } + rv_data += num_cols_; + } } - rv_data += num_cols_; - } + } else if (rv.Dim() == num_cols_) { + const Real *rv_data = rv.Data(); + for (MatrixIndexT r = 0; r < num_rows_; r++) + std::memcpy(RowData(r), rv_data, sizeof(Real) * num_cols_); + } else { + KALDI_ERR << "Wrong sized arguments"; } - } else if (rv.Dim() == num_cols_) { - const Real *rv_data = rv.Data(); - for (MatrixIndexT r = 0; r < num_rows_; r++) - std::memcpy(RowData(r), rv_data, sizeof(Real)*num_cols_); - } else { - KALDI_ERR << "Wrong sized arguments"; - } } -template -template +template +template void MatrixBase::CopyRowsFromVec(const VectorBase &rv) { - if (rv.Dim() == num_rows_*num_cols_) { - const OtherReal *rv_data = rv.Data(); - for (MatrixIndexT r = 0; r < num_rows_; r++) { - Real *row_data = RowData(r); - for (MatrixIndexT c = 0; c < num_cols_; c++) { - row_data[c] = static_cast(rv_data[c]); - } - rv_data += num_cols_; + if (rv.Dim() == num_rows_ * num_cols_) { + const OtherReal *rv_data = rv.Data(); + for (MatrixIndexT r = 0; r < num_rows_; r++) { + Real *row_data = RowData(r); + for (MatrixIndexT c = 0; c < num_cols_; c++) { + row_data[c] = static_cast(rv_data[c]); + } + rv_data += num_cols_; + } + } else if (rv.Dim() == num_cols_) { + const OtherReal *rv_data = rv.Data(); + Real *first_row_data = RowData(0); + for (MatrixIndexT c = 0; c < num_cols_; c++) + first_row_data[c] = rv_data[c]; + for (MatrixIndexT r = 1; r < num_rows_; r++) + std::memcpy(RowData(r), first_row_data, sizeof(Real) * num_cols_); + } else { + KALDI_ERR << "Wrong sized arguments."; } - } else if (rv.Dim() == num_cols_) { - const OtherReal *rv_data = rv.Data(); - Real *first_row_data = RowData(0); - for (MatrixIndexT c = 0; c < num_cols_; c++) - first_row_data[c] = rv_data[c]; - for (MatrixIndexT r = 1; r < num_rows_; r++) - std::memcpy(RowData(r), first_row_data, sizeof(Real)*num_cols_); - } else { - KALDI_ERR << "Wrong sized arguments."; - } } -template -void MatrixBase::CopyRowsFromVec(const VectorBase &rv); -template -void MatrixBase::CopyRowsFromVec(const VectorBase &rv); +template void MatrixBase::CopyRowsFromVec(const VectorBase &rv); +template void MatrixBase::CopyRowsFromVec(const VectorBase &rv); -template +template void MatrixBase::CopyColsFromVec(const VectorBase &rv) { - if (rv.Dim() == num_rows_*num_cols_) { - const Real *v_inc_data = rv.Data(); - Real *m_inc_data = data_; + if (rv.Dim() == num_rows_ * num_cols_) { + const Real *v_inc_data = rv.Data(); + Real *m_inc_data = data_; - for (MatrixIndexT c = 0; c < num_cols_; c++) { - for (MatrixIndexT r = 0; r < num_rows_; r++) { - m_inc_data[r * stride_] = v_inc_data[r]; - } - v_inc_data += num_rows_; - m_inc_data ++; - } - } else if (rv.Dim() == num_rows_) { - const Real *v_inc_data = rv.Data(); - Real *m_inc_data = data_; - for (MatrixIndexT r = 0; r < num_rows_; r++) { - Real value = *(v_inc_data++); - for (MatrixIndexT c = 0; c < num_cols_; c++) - m_inc_data[c] = value; - m_inc_data += stride_; + for (MatrixIndexT c = 0; c < num_cols_; c++) { + for (MatrixIndexT r = 0; r < num_rows_; r++) { + m_inc_data[r * stride_] = v_inc_data[r]; + } + v_inc_data += num_rows_; + m_inc_data++; + } + } else if (rv.Dim() == num_rows_) { + const Real *v_inc_data = rv.Data(); + Real *m_inc_data = data_; + for (MatrixIndexT r = 0; r < num_rows_; r++) { + Real value = *(v_inc_data++); + for (MatrixIndexT c = 0; c < num_cols_; c++) m_inc_data[c] = value; + m_inc_data += stride_; + } + } else { + KALDI_ERR << "Wrong size of arguments."; } - } else { - KALDI_ERR << "Wrong size of arguments."; - } } -template -void MatrixBase::CopyRowFromVec(const VectorBase &rv, const MatrixIndexT row) { - KALDI_ASSERT(rv.Dim() == num_cols_ && - static_cast(row) < - static_cast(num_rows_)); +template +void MatrixBase::CopyRowFromVec(const VectorBase &rv, + const MatrixIndexT row) { + KALDI_ASSERT(rv.Dim() == num_cols_ && + static_cast(row) < + static_cast(num_rows_)); - const Real *rv_data = rv.Data(); - Real *row_data = RowData(row); + const Real *rv_data = rv.Data(); + Real *row_data = RowData(row); - std::memcpy(row_data, rv_data, num_cols_ * sizeof(Real)); + std::memcpy(row_data, rv_data, num_cols_ * sizeof(Real)); } /* template @@ -1091,40 +1117,40 @@ void MatrixBase::CopyDiagFromVec(const VectorBase &rv) { *my_data = *rv_data; }*/ -template +template void MatrixBase::CopyColFromVec(const VectorBase &rv, const MatrixIndexT col) { - KALDI_ASSERT(rv.Dim() == num_rows_ && - static_cast(col) < - static_cast(num_cols_)); + KALDI_ASSERT(rv.Dim() == num_rows_ && + static_cast(col) < + static_cast(num_cols_)); - const Real *rv_data = rv.Data(); - Real *col_data = data_ + col; + const Real *rv_data = rv.Data(); + Real *col_data = data_ + col; - for (MatrixIndexT r = 0; r < num_rows_; r++) - col_data[r * stride_] = rv_data[r]; + for (MatrixIndexT r = 0; r < num_rows_; r++) + col_data[r * stride_] = rv_data[r]; } - -template +template void Matrix::RemoveRow(MatrixIndexT i) { - KALDI_ASSERT(static_cast(i) < - static_cast(MatrixBase::num_rows_) - && "Access out of matrix"); - for (MatrixIndexT j = i + 1; j < MatrixBase::num_rows_; j++) - MatrixBase::Row(j-1).CopyFromVec( MatrixBase::Row(j)); - MatrixBase::num_rows_--; + KALDI_ASSERT( + static_cast(i) < + static_cast(MatrixBase::num_rows_) && + "Access out of matrix"); + for (MatrixIndexT j = i + 1; j < MatrixBase::num_rows_; j++) + MatrixBase::Row(j - 1).CopyFromVec(MatrixBase::Row(j)); + MatrixBase::num_rows_--; } -template +template void Matrix::Destroy() { - // we need to free the data block if it was defined - if (NULL != MatrixBase::data_) - KALDI_MEMALIGN_FREE( MatrixBase::data_); - MatrixBase::data_ = NULL; - MatrixBase::num_rows_ = MatrixBase::num_cols_ - = MatrixBase::stride_ = 0; + // we need to free the data block if it was defined + if (NULL != MatrixBase::data_) + KALDI_MEMALIGN_FREE(MatrixBase::data_); + MatrixBase::data_ = NULL; + MatrixBase::num_rows_ = MatrixBase::num_cols_ = + MatrixBase::stride_ = 0; } @@ -1248,7 +1274,8 @@ template void MatrixBase::GroupPnormDeriv(const MatrixBase &input, const MatrixBase &output, Real power) { - KALDI_ASSERT(input.NumCols() == this->NumCols() && input.NumRows() == this->NumRows()); + KALDI_ASSERT(input.NumCols() == this->NumCols() && input.NumRows() == +this->NumRows()); KALDI_ASSERT(this->NumCols() % output.NumCols() == 0 && this->NumRows() == output.NumRows()); @@ -1320,22 +1347,22 @@ void MatrixBase::MulColsVec(const VectorBase &scale) { } */ -template +template void MatrixBase::SetZero() { - if (num_cols_ == stride_) - memset(data_, 0, sizeof(Real)*num_rows_*num_cols_); - else - for (MatrixIndexT row = 0; row < num_rows_; row++) - memset(data_ + row*stride_, 0, sizeof(Real)*num_cols_); + if (num_cols_ == stride_) + memset(data_, 0, sizeof(Real) * num_rows_ * num_cols_); + else + for (MatrixIndexT row = 0; row < num_rows_; row++) + memset(data_ + row * stride_, 0, sizeof(Real) * num_cols_); } -template +template void MatrixBase::Set(Real value) { - for (MatrixIndexT row = 0; row < num_rows_; row++) { - for (MatrixIndexT col = 0; col < num_cols_; col++) { - (*this)(row, col) = value; + for (MatrixIndexT row = 0; row < num_rows_; row++) { + for (MatrixIndexT col = 0; col < num_cols_; col++) { + (*this)(row, col) = value; + } } - } } /* @@ -1355,7 +1382,8 @@ void MatrixBase::SetRandn() { for (MatrixIndexT col = 0; col < nc; col += 2) { kaldi::RandGauss2(row_data + col, row_data + col + 1, &rstate); } - if (nc != num_cols_) row_data[nc] = static_cast(kaldi::RandGauss(&rstate)); + if (nc != num_cols_) row_data[nc] = +static_cast(kaldi::RandGauss(&rstate)); } } @@ -1371,273 +1399,302 @@ void MatrixBase::SetRandUniform() { } */ -template +template void MatrixBase::Write(std::ostream &os, bool binary) const { - if (!os.good()) { - KALDI_ERR << "Failed to write matrix to stream: stream not good"; - } - if (binary) { // Use separate binary and text formats, - // since in binary mode we need to know if it's float or double. - std::string my_token = (sizeof(Real) == 4 ? "FM" : "DM"); - - WriteToken(os, binary, my_token); - { - int32 rows = this->num_rows_; // make the size 32-bit on disk. - int32 cols = this->num_cols_; - KALDI_ASSERT(this->num_rows_ == (MatrixIndexT) rows); - KALDI_ASSERT(this->num_cols_ == (MatrixIndexT) cols); - WriteBasicType(os, binary, rows); - WriteBasicType(os, binary, cols); - } - if (Stride() == NumCols()) - os.write(reinterpret_cast (Data()), sizeof(Real) - * static_cast(num_rows_) * static_cast(num_cols_)); - else - for (MatrixIndexT i = 0; i < num_rows_; i++) - os.write(reinterpret_cast (RowData(i)), sizeof(Real) - * num_cols_); if (!os.good()) { - KALDI_ERR << "Failed to write matrix to stream"; - } - } else { // text mode. - if (num_cols_ == 0) { - os << " [ ]\n"; - } else { - os << " ["; - for (MatrixIndexT i = 0; i < num_rows_; i++) { - os << "\n "; - for (MatrixIndexT j = 0; j < num_cols_; j++) - os << (*this)(i, j) << " "; - } - os << "]\n"; + KALDI_ERR << "Failed to write matrix to stream: stream not good"; + } + if (binary) { // Use separate binary and text formats, + // since in binary mode we need to know if it's float or double. + std::string my_token = (sizeof(Real) == 4 ? "FM" : "DM"); + + WriteToken(os, binary, my_token); + { + int32 rows = this->num_rows_; // make the size 32-bit on disk. + int32 cols = this->num_cols_; + KALDI_ASSERT(this->num_rows_ == (MatrixIndexT)rows); + KALDI_ASSERT(this->num_cols_ == (MatrixIndexT)cols); + WriteBasicType(os, binary, rows); + WriteBasicType(os, binary, cols); + } + if (Stride() == NumCols()) + os.write(reinterpret_cast(Data()), + sizeof(Real) * static_cast(num_rows_) * + static_cast(num_cols_)); + else + for (MatrixIndexT i = 0; i < num_rows_; i++) + os.write(reinterpret_cast(RowData(i)), + sizeof(Real) * num_cols_); + if (!os.good()) { + KALDI_ERR << "Failed to write matrix to stream"; + } + } else { // text mode. + if (num_cols_ == 0) { + os << " [ ]\n"; + } else { + os << " ["; + for (MatrixIndexT i = 0; i < num_rows_; i++) { + os << "\n "; + for (MatrixIndexT j = 0; j < num_cols_; j++) + os << (*this)(i, j) << " "; + } + os << "]\n"; + } } - } } -template -void MatrixBase::Read(std::istream & is, bool binary) { - // In order to avoid rewriting this, we just declare a Matrix and - // use it to read the data, then copy. - Matrix tmp; - tmp.Read(is, binary); - if (tmp.NumRows() != NumRows() || tmp.NumCols() != NumCols()) { - KALDI_ERR << "MatrixBase::Read, size mismatch " - << NumRows() << " x " << NumCols() << " versus " - << tmp.NumRows() << " x " << tmp.NumCols(); - } - CopyFromMat(tmp); +template +void MatrixBase::Read(std::istream &is, bool binary) { + // In order to avoid rewriting this, we just declare a Matrix and + // use it to read the data, then copy. + Matrix tmp; + tmp.Read(is, binary); + if (tmp.NumRows() != NumRows() || tmp.NumCols() != NumCols()) { + KALDI_ERR << "MatrixBase::Read, size mismatch " << NumRows() + << " x " << NumCols() << " versus " << tmp.NumRows() << " x " + << tmp.NumCols(); + } + CopyFromMat(tmp); } -template -void Matrix::Read(std::istream & is, bool binary) { - // now assume add == false. - MatrixIndexT pos_at_start = is.tellg(); - std::ostringstream specific_error; - - if (binary) { // Read in binary mode. - int peekval = Peek(is, binary); - if (peekval == 'C') { - // This code enables us to read CompressedMatrix as a regular matrix. - //CompressedMatrix compressed_mat; - //compressed_mat.Read(is, binary); // at this point, add == false. - //this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols()); - //compressed_mat.CopyToMat(this); - return; - } - const char *my_token = (sizeof(Real) == 4 ? "FM" : "DM"); - char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F'); - if (peekval == other_token_start) { // need to instantiate the other type to read it. - typedef typename OtherReal::Real OtherType; // if Real == float, OtherType == double, and vice versa. - Matrix other(this->num_rows_, this->num_cols_); - other.Read(is, binary); // add is false at this point anyway. - this->Resize(other.NumRows(), other.NumCols()); - this->CopyFromMat(other); - return; - } - std::string token; - ReadToken(is, binary, &token); - if (token != my_token) { - if (token.length() > 20) token = token.substr(0, 17) + "..."; - specific_error << ": Expected token " << my_token << ", got " << token; - goto bad; - } - int32 rows, cols; - ReadBasicType(is, binary, &rows); // throws on error. - ReadBasicType(is, binary, &cols); // throws on error. - if ((MatrixIndexT)rows != this->num_rows_ || (MatrixIndexT)cols != this->num_cols_) { - this->Resize(rows, cols); - } - if (this->Stride() == this->NumCols() && rows*cols!=0) { - is.read(reinterpret_cast(this->Data()), - sizeof(Real)*rows*cols); - if (is.fail()) goto bad; - } else { - for (MatrixIndexT i = 0; i < (MatrixIndexT)rows; i++) { - is.read(reinterpret_cast(this->RowData(i)), sizeof(Real)*cols); - if (is.fail()) goto bad; - } - } - if (is.eof()) return; - if (is.fail()) goto bad; - return; - } else { // Text mode. - std::string str; - is >> str; // get a token - if (is.fail()) { specific_error << ": Expected \"[\", got EOF"; goto bad; } - // if ((str.compare("DM") == 0) || (str.compare("FM") == 0)) { // Back compatibility. - // is >> str; // get #rows - // is >> str; // get #cols - // is >> str; // get "[" - // } - if (str == "[]") { Resize(0, 0); return; } // Be tolerant of variants. - else if (str != "[") { - if (str.length() > 20) str = str.substr(0, 17) + "..."; - specific_error << ": Expected \"[\", got \"" << str << '"'; - goto bad; - } - // At this point, we have read "[". - std::vector* > data; - std::vector *cur_row = new std::vector; - while (1) { - int i = is.peek(); - if (i == -1) { specific_error << "Got EOF while reading matrix data"; goto cleanup; } - else if (static_cast(i) == ']') { // Finished reading matrix. - is.get(); // eat the "]". - i = is.peek(); - if (static_cast(i) == '\r') { - is.get(); - is.get(); // get \r\n (must eat what we wrote) - } else if (static_cast(i) == '\n') { is.get(); } // get \n (must eat what we wrote) - if (is.fail()) { - KALDI_WARN << "After end of matrix data, read error."; - // we got the data we needed, so just warn for this error. +template +void Matrix::Read(std::istream &is, bool binary) { + // now assume add == false. + MatrixIndexT pos_at_start = is.tellg(); + std::ostringstream specific_error; + + if (binary) { // Read in binary mode. + int peekval = Peek(is, binary); + if (peekval == 'C') { + // This code enables us to read CompressedMatrix as a regular + // matrix. + // CompressedMatrix compressed_mat; + // compressed_mat.Read(is, binary); // at this point, add == false. + // this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols()); + // compressed_mat.CopyToMat(this); + return; } - // Now process the data. - if (!cur_row->empty()) data.push_back(cur_row); - else delete(cur_row); - cur_row = NULL; - if (data.empty()) { this->Resize(0, 0); return; } - else { - int32 num_rows = data.size(), num_cols = data[0]->size(); - this->Resize(num_rows, num_cols); - for (int32 i = 0; i < num_rows; i++) { - if (static_cast(data[i]->size()) != num_cols) { - specific_error << "Matrix has inconsistent #cols: " << num_cols - << " vs." << data[i]->size() << " (processing row" - << i << ")"; - goto cleanup; + const char *my_token = (sizeof(Real) == 4 ? "FM" : "DM"); + char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F'); + if (peekval == other_token_start) { // need to instantiate the other + // type to read it. + typedef typename OtherReal::Real OtherType; // if Real == + // float, + // OtherType == + // double, and + // vice versa. + Matrix other(this->num_rows_, this->num_cols_); + other.Read(is, binary); // add is false at this point anyway. + this->Resize(other.NumRows(), other.NumCols()); + this->CopyFromMat(other); + return; + } + std::string token; + ReadToken(is, binary, &token); + if (token != my_token) { + if (token.length() > 20) token = token.substr(0, 17) + "..."; + specific_error << ": Expected token " << my_token << ", got " + << token; + goto bad; + } + int32 rows, cols; + ReadBasicType(is, binary, &rows); // throws on error. + ReadBasicType(is, binary, &cols); // throws on error. + if ((MatrixIndexT)rows != this->num_rows_ || + (MatrixIndexT)cols != this->num_cols_) { + this->Resize(rows, cols); + } + if (this->Stride() == this->NumCols() && rows * cols != 0) { + is.read(reinterpret_cast(this->Data()), + sizeof(Real) * rows * cols); + if (is.fail()) goto bad; + } else { + for (MatrixIndexT i = 0; i < (MatrixIndexT)rows; i++) { + is.read(reinterpret_cast(this->RowData(i)), + sizeof(Real) * cols); + if (is.fail()) goto bad; } - for (int32 j = 0; j < num_cols; j++) - (*this)(i, j) = (*(data[i]))[j]; - delete data[i]; - data[i] = NULL; - } } + if (is.eof()) return; + if (is.fail()) goto bad; return; - } else if (static_cast(i) == '\n' || static_cast(i) == ';') { - // End of matrix row. - is.get(); - if (cur_row->size() != 0) { - data.push_back(cur_row); - cur_row = new std::vector; - cur_row->reserve(data.back()->size()); - } - } else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number... - Real r; - is >> r; + } else { // Text mode. + std::string str; + is >> str; // get a token if (is.fail()) { - specific_error << "Stream failure/EOF while reading matrix data."; - goto cleanup; + specific_error << ": Expected \"[\", got EOF"; + goto bad; } - cur_row->push_back(r); - } else if (isspace(i)) { - is.get(); // eat the space and do nothing. - } else { // NaN or inf or error. - std::string str; - is >> str; - if (!KALDI_STRCASECMP(str.c_str(), "inf") || - !KALDI_STRCASECMP(str.c_str(), "infinity")) { - cur_row->push_back(std::numeric_limits::infinity()); - KALDI_WARN << "Reading infinite value into matrix."; - } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) { - cur_row->push_back(std::numeric_limits::quiet_NaN()); - KALDI_WARN << "Reading NaN value into matrix."; - } else { - if (str.length() > 20) str = str.substr(0, 17) + "..."; - specific_error << "Expecting numeric matrix data, got " << str; - goto cleanup; + // if ((str.compare("DM") == 0) || (str.compare("FM") == 0)) { // Back + // compatibility. + // is >> str; // get #rows + // is >> str; // get #cols + // is >> str; // get "[" + // } + if (str == "[]") { + Resize(0, 0); + return; + } // Be tolerant of variants. + else if (str != "[") { + if (str.length() > 20) str = str.substr(0, 17) + "..."; + specific_error << ": Expected \"[\", got \"" << str << '"'; + goto bad; + } + // At this point, we have read "[". + std::vector *> data; + std::vector *cur_row = new std::vector; + while (1) { + int i = is.peek(); + if (i == -1) { + specific_error << "Got EOF while reading matrix data"; + goto cleanup; + } else if (static_cast(i) == + ']') { // Finished reading matrix. + is.get(); // eat the "]". + i = is.peek(); + if (static_cast(i) == '\r') { + is.get(); + is.get(); // get \r\n (must eat what we wrote) + } else if (static_cast(i) == '\n') { + is.get(); + } // get \n (must eat what we wrote) + if (is.fail()) { + KALDI_WARN << "After end of matrix data, read error."; + // we got the data we needed, so just warn for this error. + } + // Now process the data. + if (!cur_row->empty()) + data.push_back(cur_row); + else + delete (cur_row); + cur_row = NULL; + if (data.empty()) { + this->Resize(0, 0); + return; + } else { + int32 num_rows = data.size(), num_cols = data[0]->size(); + this->Resize(num_rows, num_cols); + for (int32 i = 0; i < num_rows; i++) { + if (static_cast(data[i]->size()) != num_cols) { + specific_error + << "Matrix has inconsistent #cols: " << num_cols + << " vs." << data[i]->size() + << " (processing row" << i << ")"; + goto cleanup; + } + for (int32 j = 0; j < num_cols; j++) + (*this)(i, j) = (*(data[i]))[j]; + delete data[i]; + data[i] = NULL; + } + } + return; + } else if (static_cast(i) == '\n' || + static_cast(i) == ';') { + // End of matrix row. + is.get(); + if (cur_row->size() != 0) { + data.push_back(cur_row); + cur_row = new std::vector; + cur_row->reserve(data.back()->size()); + } + } else if ((i >= '0' && i <= '9') || i == '-') { // A number... + Real r; + is >> r; + if (is.fail()) { + specific_error + << "Stream failure/EOF while reading matrix data."; + goto cleanup; + } + cur_row->push_back(r); + } else if (isspace(i)) { + is.get(); // eat the space and do nothing. + } else { // NaN or inf or error. + std::string str; + is >> str; + if (!KALDI_STRCASECMP(str.c_str(), "inf") || + !KALDI_STRCASECMP(str.c_str(), "infinity")) { + cur_row->push_back(std::numeric_limits::infinity()); + KALDI_WARN << "Reading infinite value into matrix."; + } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) { + cur_row->push_back(std::numeric_limits::quiet_NaN()); + KALDI_WARN << "Reading NaN value into matrix."; + } else { + if (str.length() > 20) str = str.substr(0, 17) + "..."; + specific_error << "Expecting numeric matrix data, got " + << str; + goto cleanup; + } + } } - } - } // Note, we never leave the while () loop before this // line (we return from it.) - cleanup: // We only reach here in case of error in the while loop above. - if(cur_row != NULL) - delete cur_row; - for (size_t i = 0; i < data.size(); i++) - if(data[i] != NULL) - delete data[i]; - // and then go on to "bad" below, where we print error. - } + cleanup: // We only reach here in case of error in the while loop above. + if (cur_row != NULL) delete cur_row; + for (size_t i = 0; i < data.size(); i++) + if (data[i] != NULL) delete data[i]; + // and then go on to "bad" below, where we print error. + } bad: - KALDI_ERR << "Failed to read matrix from stream. " << specific_error.str() - << " File position at start is " - << pos_at_start << ", currently " << is.tellg(); + KALDI_ERR << "Failed to read matrix from stream. " << specific_error.str() + << " File position at start is " << pos_at_start << ", currently " + << is.tellg(); } // Constructor... note that this is not const-safe as it would // be quite complicated to implement a "const SubMatrix" class that // would not allow its contents to be changed. -template +template SubMatrix::SubMatrix(const MatrixBase &M, const MatrixIndexT ro, const MatrixIndexT r, const MatrixIndexT co, const MatrixIndexT c) { - if (r == 0 || c == 0) { - // we support the empty sub-matrix as a special case. - KALDI_ASSERT(c == 0 && r == 0); - this->data_ = NULL; - this->num_cols_ = 0; - this->num_rows_ = 0; - this->stride_ = 0; - return; - } - KALDI_ASSERT(static_cast(ro) < - static_cast(M.num_rows_) && - static_cast(co) < - static_cast(M.num_cols_) && - static_cast(r) <= - static_cast(M.num_rows_ - ro) && - static_cast(c) <= - static_cast(M.num_cols_ - co)); - // point to the begining of window - MatrixBase::num_rows_ = r; - MatrixBase::num_cols_ = c; - MatrixBase::stride_ = M.Stride(); - MatrixBase::data_ = M.Data_workaround() + - static_cast(co) + - static_cast(ro) * static_cast(M.Stride()); + if (r == 0 || c == 0) { + // we support the empty sub-matrix as a special case. + KALDI_ASSERT(c == 0 && r == 0); + this->data_ = NULL; + this->num_cols_ = 0; + this->num_rows_ = 0; + this->stride_ = 0; + return; + } + KALDI_ASSERT(static_cast(ro) < + static_cast(M.num_rows_) && + static_cast(co) < + static_cast(M.num_cols_) && + static_cast(r) <= + static_cast(M.num_rows_ - ro) && + static_cast(c) <= + static_cast(M.num_cols_ - co)); + // point to the begining of window + MatrixBase::num_rows_ = r; + MatrixBase::num_cols_ = c; + MatrixBase::stride_ = M.Stride(); + MatrixBase::data_ = + M.Data_workaround() + static_cast(co) + + static_cast(ro) * static_cast(M.Stride()); } -template +template SubMatrix::SubMatrix(Real *data, MatrixIndexT num_rows, MatrixIndexT num_cols, - MatrixIndexT stride): - MatrixBase(data, num_cols, num_rows, stride) { // caution: reversed order! - if (data == NULL) { - KALDI_ASSERT(num_rows * num_cols == 0); - this->num_rows_ = 0; - this->num_cols_ = 0; - this->stride_ = 0; - } else { - KALDI_ASSERT(this->stride_ >= this->num_cols_); - } + MatrixIndexT stride) + : MatrixBase( + data, num_cols, num_rows, stride) { // caution: reversed order! + if (data == NULL) { + KALDI_ASSERT(num_rows * num_cols == 0); + this->num_rows_ = 0; + this->num_cols_ = 0; + this->stride_ = 0; + } else { + KALDI_ASSERT(this->stride_ >= this->num_cols_); + } } /* @@ -1665,9 +1722,11 @@ Real MatrixBase::Cond() const { KALDI_ASSERT(num_rows_ > 0&&num_cols_ > 0); Vector singular_values(std::min(num_rows_, num_cols_)); Svd(&singular_values); // Get singular values... - Real min = singular_values(0), max = singular_values(0); // both absolute values... + Real min = singular_values(0), max = singular_values(0); // both absolute +values... for (MatrixIndexT i = 1;i < singular_values.Dim();i++) { - min = std::min((Real)std::abs(singular_values(i)), min); max = std::max((Real)std::abs(singular_values(i)), max); + min = std::min((Real)std::abs(singular_values(i)), min); max = +std::max((Real)std::abs(singular_values(i)), max); } if (min > 0) return max/min; else return std::numeric_limits::infinity(); @@ -1677,7 +1736,8 @@ template Real MatrixBase::Trace(bool check_square) const { KALDI_ASSERT(!check_square || num_rows_ == num_cols_); Real ans = 0.0; - for (MatrixIndexT r = 0;r < std::min(num_rows_, num_cols_);r++) ans += data_ [r + stride_*r]; + for (MatrixIndexT r = 0;r < std::min(num_rows_, num_cols_);r++) ans += data_ +[r + stride_*r]; return ans; } @@ -1707,22 +1767,29 @@ Real MatrixBase::Min() const { template void MatrixBase::AddMatMatMat(Real alpha, - const MatrixBase &A, MatrixTransposeType transA, - const MatrixBase &B, MatrixTransposeType transB, - const MatrixBase &C, MatrixTransposeType transC, + const MatrixBase &A, +MatrixTransposeType transA, + const MatrixBase &B, +MatrixTransposeType transB, + const MatrixBase &C, +MatrixTransposeType transC, Real beta) { - // Note on time taken with different orders of computation. Assume not transposed in this / - // discussion. Firstly, normalize expressions using A.NumCols == B.NumRows and B.NumCols == C.NumRows, prefer + // Note on time taken with different orders of computation. Assume not +transposed in this / + // discussion. Firstly, normalize expressions using A.NumCols == B.NumRows and +B.NumCols == C.NumRows, prefer // rows where there is a choice. // time taken for (AB) is: A.NumRows*B.NumRows*C.Rows // time taken for (AB)C is A.NumRows*C.NumRows*C.Cols - // so this order is A.NumRows*B.NumRows*C.NumRows + A.NumRows*C.NumRows*C.NumCols. + // so this order is A.NumRows*B.NumRows*C.NumRows + +A.NumRows*C.NumRows*C.NumCols. // time taken for (BC) is: B.NumRows*C.NumRows*C.Cols // time taken for A(BC) is: A.NumRows*B.NumRows*C.Cols // so this order is B.NumRows*C.NumRows*C.NumCols + A.NumRows*B.NumRows*C.Cols - MatrixIndexT ARows = A.num_rows_, ACols = A.num_cols_, BRows = B.num_rows_, BCols = B.num_cols_, + MatrixIndexT ARows = A.num_rows_, ACols = A.num_cols_, BRows = B.num_rows_, +BCols = B.num_cols_, CRows = C.num_rows_, CCols = C.num_cols_; if (transA == kTrans) std::swap(ARows, ACols); if (transB == kTrans) std::swap(BRows, BCols); @@ -1746,58 +1813,71 @@ void MatrixBase::AddMatMatMat(Real alpha, template -void MatrixBase::DestructiveSvd(VectorBase *s, MatrixBase *U, MatrixBase *Vt) { +void MatrixBase::DestructiveSvd(VectorBase *s, MatrixBase *U, +MatrixBase *Vt) { // Svd, *this = U*diag(s)*Vt. // With (*this).num_rows_ == m, (*this).num_cols_ == n, - // Support only skinny Svd with m>=n (NumRows>=NumCols), and zero sizes for U and Vt mean + // Support only skinny Svd with m>=n (NumRows>=NumCols), and zero sizes for U +and Vt mean // we do not want that output. We expect that s.Dim() == m, // U is either 0 by 0 or m by n, and rv is either 0 by 0 or n by n. // Throws exception on error. - KALDI_ASSERT(num_rows_>=num_cols_ && "Svd requires that #rows by >= #cols."); // For compatibility with JAMA code. + KALDI_ASSERT(num_rows_>=num_cols_ && "Svd requires that #rows by >= #cols."); +// For compatibility with JAMA code. KALDI_ASSERT(s->Dim() == num_cols_); // s should be the smaller dim. - KALDI_ASSERT(U == NULL || (U->num_rows_ == num_rows_&&U->num_cols_ == num_cols_)); - KALDI_ASSERT(Vt == NULL || (Vt->num_rows_ == num_cols_&&Vt->num_cols_ == num_cols_)); + KALDI_ASSERT(U == NULL || (U->num_rows_ == num_rows_&&U->num_cols_ == +num_cols_)); + KALDI_ASSERT(Vt == NULL || (Vt->num_rows_ == num_cols_&&Vt->num_cols_ == +num_cols_)); Real prescale = 1.0; - if ( std::abs((*this)(0, 0) ) < 1.0e-30) { // Very tiny value... can cause problems in Svd. + if ( std::abs((*this)(0, 0) ) < 1.0e-30) { // Very tiny value... can cause +problems in Svd. Real max_elem = LargestAbsElem(); if (max_elem != 0) { prescale = 1.0 / max_elem; - if (std::abs(prescale) == std::numeric_limits::infinity()) { prescale = 1.0e+40; } + if (std::abs(prescale) == std::numeric_limits::infinity()) { +prescale = 1.0e+40; } (*this).Scale(prescale); } } #if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD) - // "S" == skinny Svd (only one we support because of compatibility with Jama one which is only skinny), + // "S" == skinny Svd (only one we support because of compatibility with Jama +one which is only skinny), // "N"== no eigenvectors wanted. LapackGesvd(s, U, Vt); #else /* if (num_rows_ > 1 && num_cols_ > 1 && (*this)(0, 0) == (*this)(1, 1) - && Max() == Min() && (*this)(0, 0) != 0.0) { // special case that JamaSvd sometimes crashes on. - KALDI_WARN << "Jama SVD crashes on this type of matrix, perturbing it to prevent crash."; + && Max() == Min() && (*this)(0, 0) != 0.0) { // special case that JamaSvd +sometimes crashes on. + KALDI_WARN << "Jama SVD crashes on this type of matrix, perturbing it to +prevent crash."; for(int32 i = 0; i < NumRows(); i++) (*this)(i, i) *= 1.00001; }*/ // bool ans = JamaSvd(s, U, Vt); - //if (Vt != NULL) Vt->Transpose(); // possibly to do: change this and also the transpose inside the JamaSvd routine. note, Vt is square. - //if (!ans) { - //KALDI_ERR << "Error doing Svd"; // This one will be caught. - //} +// if (Vt != NULL) Vt->Transpose(); // possibly to do: change this and also the +// transpose inside the JamaSvd routine. note, Vt is square. +// if (!ans) { +// KALDI_ERR << "Error doing Svd"; // This one will be caught. +//} //#endif - //if (prescale != 1.0) s->Scale(1.0/prescale); +// if (prescale != 1.0) s->Scale(1.0/prescale); //} /* template -void MatrixBase::Svd(VectorBase *s, MatrixBase *U, MatrixBase *Vt) const { +void MatrixBase::Svd(VectorBase *s, MatrixBase *U, +MatrixBase *Vt) const { try { if (num_rows_ >= num_cols_) { Matrix tmp(*this); tmp.DestructiveSvd(s, U, Vt); } else { Matrix tmp(*this, kTrans); // transpose of *this. - // rVt will have different dim so cannot transpose in-place --> use a temp matrix. + // rVt will have different dim so cannot transpose in-place --> use a temp +matrix. Matrix Vt_Trans(Vt ? Vt->num_cols_ : 0, Vt ? Vt->num_rows_ : 0); // U will be transpose tmp.DestructiveSvd(s, Vt ? &Vt_Trans : NULL, U); @@ -1806,7 +1886,8 @@ void MatrixBase::Svd(VectorBase *s, MatrixBase *U, MatrixBase< } } catch (...) { KALDI_ERR << "Error doing Svd (did not converge), first part of matrix is\n" - << SubMatrix(*this, 0, std::min((MatrixIndexT)10, num_rows_), + << SubMatrix(*this, 0, std::min((MatrixIndexT)10, +num_rows_), 0, std::min((MatrixIndexT)10, num_cols_)) << ", min and max are: " << Min() << ", " << Max(); } @@ -1819,7 +1900,8 @@ bool MatrixBase::IsSymmetric(Real cutoff) const { Real bad_sum = 0.0, good_sum = 0.0; for (MatrixIndexT i = 0;i < R;i++) { for (MatrixIndexT j = 0;j < i;j++) { - Real a = (*this)(i, j), b = (*this)(j, i), avg = 0.5*(a+b), diff = 0.5*(a-b); + Real a = (*this)(i, j), b = (*this)(j, i), avg = 0.5*(a+b), diff = +0.5*(a-b); good_sum += std::abs(avg); bad_sum += std::abs(diff); } good_sum += std::abs((*this)(i, i)); @@ -1860,7 +1942,8 @@ bool MatrixBase::IsUnit(Real cutoff) const { Real bad_max = 0.0; for (MatrixIndexT i = 0; i < R;i++) for (MatrixIndexT j = 0; j < C;j++) - bad_max = std::max(bad_max, static_cast(std::abs( (*this)(i, j) - (i == j?1.0:0.0)))); + bad_max = std::max(bad_max, static_cast(std::abs( (*this)(i, j) - (i +== j?1.0:0.0)))); return (bad_max <= cutoff); } @@ -1880,7 +1963,8 @@ Real MatrixBase::FrobeniusNorm() const{ } template -bool MatrixBase::ApproxEqual(const MatrixBase &other, float tol) const { +bool MatrixBase::ApproxEqual(const MatrixBase &other, float tol) +const { if (num_rows_ != other.num_rows_ || num_cols_ != other.num_cols_) KALDI_ERR << "ApproxEqual: size mismatch."; Matrix tmp(*this); @@ -1953,27 +2037,35 @@ void MatrixBase::OrthogonalizeRows() { } -// Uses Svd to compute the eigenvalue decomposition of a symmetric positive semidefinite +// Uses Svd to compute the eigenvalue decomposition of a symmetric positive +semidefinite // matrix: -// (*this) = rU * diag(rs) * rU^T, with rU an orthogonal matrix so rU^{-1} = rU^T. -// Does this by computing svd (*this) = U diag(rs) V^T ... answer is just U diag(rs) U^T. -// Throws exception if this failed to within supplied precision (typically because *this was not +// (*this) = rU * diag(rs) * rU^T, with rU an orthogonal matrix so rU^{-1} = +rU^T. +// Does this by computing svd (*this) = U diag(rs) V^T ... answer is just U +diag(rs) U^T. +// Throws exception if this failed to within supplied precision (typically +because *this was not // symmetric positive definite). template -void MatrixBase::SymPosSemiDefEig(VectorBase *rs, MatrixBase *rU, Real check_thresh) // e.g. check_thresh = 0.001 +void MatrixBase::SymPosSemiDefEig(VectorBase *rs, MatrixBase +*rU, Real check_thresh) // e.g. check_thresh = 0.001 { const MatrixIndexT D = num_rows_; KALDI_ASSERT(num_rows_ == num_cols_); - KALDI_ASSERT(IsSymmetric() && "SymPosSemiDefEig: expecting input to be symmetrical."); + KALDI_ASSERT(IsSymmetric() && "SymPosSemiDefEig: expecting input to be +symmetrical."); KALDI_ASSERT(rU->num_rows_ == D && rU->num_cols_ == D && rs->Dim() == D); Matrix Vt(D, D); Svd(rs, rU, &Vt); - // First just zero any singular values if the column of U and V do not have +ve dot product-- - // this may mean we have small negative eigenvalues, and if we zero them the result will be closer to correct. + // First just zero any singular values if the column of U and V do not have ++ve dot product-- + // this may mean we have small negative eigenvalues, and if we zero them the +result will be closer to correct. for (MatrixIndexT i = 0;i < D;i++) { Real sum = 0.0; for (MatrixIndexT j = 0;j < D;j++) sum += (*rU)(j, i) * Vt(i, j); @@ -1992,9 +2084,12 @@ void MatrixBase::SymPosSemiDefEig(VectorBase *rs, MatrixBase * if (!(old_norm == 0 && new_norm == 0)) { float diff_norm = tmpThisFull.FrobeniusNorm(); - if (std::abs(new_norm-old_norm) > old_norm*check_thresh || diff_norm > old_norm*check_thresh) { - KALDI_WARN << "SymPosSemiDefEig seems to have failed " << diff_norm << " !<< " - << check_thresh << "*" << old_norm << ", maybe matrix was not " + if (std::abs(new_norm-old_norm) > old_norm*check_thresh || diff_norm > +old_norm*check_thresh) { + KALDI_WARN << "SymPosSemiDefEig seems to have failed " << diff_norm << " +!<< " + << check_thresh << "*" << old_norm << ", maybe matrix was not +" << "positive semi definite. Continuing anyway."; } } @@ -2006,7 +2101,8 @@ template Real MatrixBase::LogDet(Real *det_sign) const { Real log_det; Matrix tmp(*this); - tmp.Invert(&log_det, det_sign, false); // false== output not needed (saves some computation). + tmp.Invert(&log_det, det_sign, false); // false== output not needed (saves +some computation). return log_det; } @@ -2022,26 +2118,25 @@ void MatrixBase::InvertDouble(Real *log_det, Real *det_sign, } */ -//template -//void MatrixBase::CopyFromMat(const CompressedMatrix &mat) { - //mat.CopyToMat(this); +// template +// void MatrixBase::CopyFromMat(const CompressedMatrix &mat) { +// mat.CopyToMat(this); //} -//template -//Matrix::Matrix(const CompressedMatrix &M): MatrixBase() { - //Resize(M.NumRows(), M.NumCols(), kUndefined); - //M.CopyToMat(this); +// template +// Matrix::Matrix(const CompressedMatrix &M): MatrixBase() { +// Resize(M.NumRows(), M.NumCols(), kUndefined); +// M.CopyToMat(this); //} - -template +template void MatrixBase::InvertElements() { - for (MatrixIndexT r = 0; r < num_rows_; r++) { - for (MatrixIndexT c = 0; c < num_cols_; c++) { - (*this)(r, c) = static_cast(1.0 / (*this)(r, c)); + for (MatrixIndexT r = 0; r < num_rows_; r++) { + for (MatrixIndexT c = 0; c < num_cols_; c++) { + (*this)(r, c) = static_cast(1.0 / (*this)(r, c)); + } } - } } /* template @@ -2108,7 +2203,8 @@ void MatrixBase::Pow(const MatrixBase &src, Real power) { } template -void MatrixBase::PowAbs(const MatrixBase &src, Real power, bool include_sign) { +void MatrixBase::PowAbs(const MatrixBase &src, Real power, bool +include_sign) { KALDI_ASSERT(SameDim(*this, src)); MatrixIndexT num_rows = num_rows_, num_cols = num_cols_; Real *row_data = data_; @@ -2117,9 +2213,9 @@ void MatrixBase::PowAbs(const MatrixBase &src, Real power, bool incl row++,row_data += stride_, src_row_data += src.stride_) { for (MatrixIndexT col = 0; col < num_cols; col ++) { if (include_sign == true && src_row_data[col] < 0) { - row_data[col] = -pow(std::abs(src_row_data[col]), power); + row_data[col] = -pow(std::abs(src_row_data[col]), power); } else { - row_data[col] = pow(std::abs(src_row_data[col]), power); + row_data[col] = pow(std::abs(src_row_data[col]), power); } } } @@ -2134,7 +2230,8 @@ void MatrixBase::Floor(const MatrixBase &src, Real floor_val) { for (MatrixIndexT row = 0; row < num_rows; row++,row_data += stride_, src_row_data += src.stride_) { for (MatrixIndexT col = 0; col < num_cols; col++) - row_data[col] = (src_row_data[col] < floor_val ? floor_val : src_row_data[col]); + row_data[col] = (src_row_data[col] < floor_val ? floor_val : +src_row_data[col]); } } @@ -2147,7 +2244,8 @@ void MatrixBase::Ceiling(const MatrixBase &src, Real ceiling_val) { for (MatrixIndexT row = 0; row < num_rows; row++,row_data += stride_, src_row_data += src.stride_) { for (MatrixIndexT col = 0; col < num_cols; col++) - row_data[col] = (src_row_data[col] > ceiling_val ? ceiling_val : src_row_data[col]); + row_data[col] = (src_row_data[col] > ceiling_val ? ceiling_val : +src_row_data[col]); } } @@ -2173,12 +2271,14 @@ void MatrixBase::ExpSpecial(const MatrixBase &src) { for (MatrixIndexT row = 0; row < num_rows; row++,row_data += stride_, src_row_data += src.stride_) { for (MatrixIndexT col = 0; col < num_cols; col++) - row_data[col] = (src_row_data[col] < Real(0) ? kaldi::Exp(src_row_data[col]) : (src_row_data[col] + Real(1))); + row_data[col] = (src_row_data[col] < Real(0) ? +kaldi::Exp(src_row_data[col]) : (src_row_data[col] + Real(1))); } } template -void MatrixBase::ExpLimited(const MatrixBase &src, Real lower_limit, Real upper_limit) { +void MatrixBase::ExpLimited(const MatrixBase &src, Real lower_limit, +Real upper_limit) { KALDI_ASSERT(SameDim(*this, src)); MatrixIndexT num_rows = num_rows_, num_cols = num_cols_; Real *row_data = data_; @@ -2188,11 +2288,11 @@ void MatrixBase::ExpLimited(const MatrixBase &src, Real lower_limit, for (MatrixIndexT col = 0; col < num_cols; col++) { const Real x = src_row_data[col]; if (!(x >= lower_limit)) - row_data[col] = kaldi::Exp(lower_limit); + row_data[col] = kaldi::Exp(lower_limit); else if (x > upper_limit) - row_data[col] = kaldi::Exp(upper_limit); + row_data[col] = kaldi::Exp(upper_limit); else - row_data[col] = kaldi::Exp(x); + row_data[col] = kaldi::Exp(x); } } } @@ -2220,12 +2320,12 @@ bool MatrixBase::Power(Real power) { return true; } */ -template +template void Matrix::Swap(Matrix *other) { - std::swap(this->data_, other->data_); - std::swap(this->num_cols_, other->num_cols_); - std::swap(this->num_rows_, other->num_rows_); - std::swap(this->stride_, other->stride_); + std::swap(this->data_, other->data_); + std::swap(this->num_cols_, other->num_cols_); + std::swap(this->num_rows_, other->num_rows_); + std::swap(this->stride_, other->stride_); } /* // Repeating this comment that appeared in the header: @@ -2238,12 +2338,14 @@ void Matrix::Swap(Matrix *other) { // be block diagonal, with 2x2 blocks corresponding to any such pairs. If a // pair is lambda +- i*mu, D will have a corresponding 2x2 block // [lambda, mu; -mu, lambda]. -// Note that if the input matrix (*this) is non-invertible, P may not be invertible +// Note that if the input matrix (*this) is non-invertible, P may not be +invertible // so in this case instead of the equation (*this) = P D P^{-1} holding, we have // instead (*this) P = P D. // // By making the pointer arguments non-NULL or NULL, the user can choose to take -// not to take the eigenvalues directly, and/or the matrix D which is block-diagonal +// not to take the eigenvalues directly, and/or the matrix D which is +block-diagonal // with 2x2 blocks. template void MatrixBase::Eig(MatrixBase *P, @@ -2369,7 +2471,8 @@ template bool ReadHtk(std::istream &is, Matrix *M, HtkHeader *header_ptr); template -bool WriteHtk(std::ostream &os, const MatrixBase &M, HtkHeader htk_hdr) // header may be derived from a previous call to ReadHtk. Must be in binary mode. +bool WriteHtk(std::ostream &os, const MatrixBase &M, HtkHeader htk_hdr) // +header may be derived from a previous call to ReadHtk. Must be in binary mode. { KALDI_ASSERT(M.NumRows() == static_cast(htk_hdr.mNSamples)); KALDI_ASSERT(M.NumCols() == static_cast(htk_hdr.mSampleSize) / @@ -2471,12 +2574,14 @@ template Real TraceMatMatMat(const MatrixBase &A, MatrixTransposeType transA, const MatrixBase &B, MatrixTransposeType transB, const MatrixBase &C, MatrixTransposeType transC) { - MatrixIndexT ARows = A.NumRows(), ACols = A.NumCols(), BRows = B.NumRows(), BCols = B.NumCols(), + MatrixIndexT ARows = A.NumRows(), ACols = A.NumCols(), BRows = B.NumRows(), +BCols = B.NumCols(), CRows = C.NumRows(), CCols = C.NumCols(); if (transA == kTrans) std::swap(ARows, ACols); if (transB == kTrans) std::swap(BRows, BCols); if (transC == kTrans) std::swap(CRows, CCols); - KALDI_ASSERT( CCols == ARows && ACols == BRows && BCols == CRows && "TraceMatMatMat: args have mismatched dimensions."); + KALDI_ASSERT( CCols == ARows && ACols == BRows && BCols == CRows && +"TraceMatMatMat: args have mismatched dimensions."); if (ARows*BCols < std::min(BRows*CCols, CRows*ACols)) { Matrix AB(ARows, BCols); AB.AddMatMat(1.0, A, transA, B, transB, 0.0); // AB = A * B. @@ -2508,13 +2613,16 @@ Real TraceMatMatMatMat(const MatrixBase &A, MatrixTransposeType transA, const MatrixBase &B, MatrixTransposeType transB, const MatrixBase &C, MatrixTransposeType transC, const MatrixBase &D, MatrixTransposeType transD) { - MatrixIndexT ARows = A.NumRows(), ACols = A.NumCols(), BRows = B.NumRows(), BCols = B.NumCols(), - CRows = C.NumRows(), CCols = C.NumCols(), DRows = D.NumRows(), DCols = D.NumCols(); + MatrixIndexT ARows = A.NumRows(), ACols = A.NumCols(), BRows = B.NumRows(), +BCols = B.NumCols(), + CRows = C.NumRows(), CCols = C.NumCols(), DRows = D.NumRows(), DCols = +D.NumCols(); if (transA == kTrans) std::swap(ARows, ACols); if (transB == kTrans) std::swap(BRows, BCols); if (transC == kTrans) std::swap(CRows, CCols); if (transD == kTrans) std::swap(DRows, DCols); - KALDI_ASSERT( DCols == ARows && ACols == BRows && BCols == CRows && CCols == DRows && "TraceMatMatMat: args have mismatched dimensions."); + KALDI_ASSERT( DCols == ARows && ACols == BRows && BCols == CRows && CCols == +DRows && "TraceMatMatMat: args have mismatched dimensions."); if (ARows*BCols < std::min(BRows*CCols, std::min(CRows*DCols, DRows*ACols))) { Matrix AB(ARows, BCols); AB.AddMatMat(1.0, A, transA, B, transB, 0.0); // AB = A * B. @@ -2541,13 +2649,18 @@ float TraceMatMatMatMat(const MatrixBase &A, MatrixTransposeType transA, const MatrixBase &D, MatrixTransposeType transD); template -double TraceMatMatMatMat(const MatrixBase &A, MatrixTransposeType transA, - const MatrixBase &B, MatrixTransposeType transB, - const MatrixBase &C, MatrixTransposeType transC, - const MatrixBase &D, MatrixTransposeType transD); +double TraceMatMatMatMat(const MatrixBase &A, MatrixTransposeType +transA, + const MatrixBase &B, MatrixTransposeType +transB, + const MatrixBase &C, MatrixTransposeType +transC, + const MatrixBase &D, MatrixTransposeType +transD); template void SortSvd(VectorBase *s, MatrixBase *U, - MatrixBase *Vt, bool sort_on_absolute_value) { + MatrixBase *Vt, bool +sort_on_absolute_value) { /// Makes sure the Svd is sorted (from greatest to least absolute value). MatrixIndexT num_singval = s->Dim(); KALDI_ASSERT(U == NULL || U->NumCols() == num_singval); @@ -2589,7 +2702,8 @@ void SortSvd(VectorBase *s, MatrixBase *U, MatrixBase *Vt, bool); template -void CreateEigenvalueMatrix(const VectorBase &re, const VectorBase &im, +void CreateEigenvalueMatrix(const VectorBase &re, const VectorBase +&im, MatrixBase *D) { MatrixIndexT n = re.Dim(); KALDI_ASSERT(im.Dim() == n && D->NumRows() == n && D->NumCols() == n); @@ -2603,7 +2717,8 @@ void CreateEigenvalueMatrix(const VectorBase &re, const VectorBase & } else { // First of a complex pair KALDI_ASSERT(j+1 < n && ApproxEqual(im(j+1), -im(j)) && ApproxEqual(re(j+1), re(j))); - /// if (im(j) < 0.0) KALDI_WARN << "Negative first im part of pair"; // TEMP + /// if (im(j) < 0.0) KALDI_WARN << "Negative first im part of pair"; // +TEMP Real lambda = re(j), mu = im(j); // create 2x2 block [lambda, mu; -mu, lambda] (*D)(j, j) = lambda; @@ -2616,10 +2731,12 @@ void CreateEigenvalueMatrix(const VectorBase &re, const VectorBase & } template -void CreateEigenvalueMatrix(const VectorBase &re, const VectorBase &im, +void CreateEigenvalueMatrix(const VectorBase &re, const VectorBase +&im, MatrixBase *D); template -void CreateEigenvalueMatrix(const VectorBase &re, const VectorBase &im, +void CreateEigenvalueMatrix(const VectorBase &re, const +VectorBase &im, MatrixBase *D); @@ -2660,7 +2777,8 @@ bool AttemptComplexPower(double *x_re, double *x_im, double power); template Real TraceMatMat(const MatrixBase &A, const MatrixBase &B, - MatrixTransposeType trans) { // tr(A B), equivalent to sum of each element of A times same element in B' + MatrixTransposeType trans) { // tr(A B), equivalent to sum of +each element of A times same element in B' MatrixIndexT aStride = A.stride_, bStride = B.stride_; if (trans == kNoTrans) { KALDI_ASSERT(A.NumRows() == B.NumCols() && A.NumCols() == B.NumRows()); @@ -2791,29 +2909,32 @@ void MatrixBase::GroupMax(const MatrixBase &src) { } } */ -template +template void MatrixBase::CopyCols(const MatrixBase &src, const MatrixIndexT *indices) { - KALDI_ASSERT(NumRows() == src.NumRows()); - MatrixIndexT num_rows = num_rows_, num_cols = num_cols_, - this_stride = stride_, src_stride = src.stride_; - Real *this_data = this->data_; - const Real *src_data = src.data_; + KALDI_ASSERT(NumRows() == src.NumRows()); + MatrixIndexT num_rows = num_rows_, num_cols = num_cols_, + this_stride = stride_, src_stride = src.stride_; + Real *this_data = this->data_; + const Real *src_data = src.data_; #ifdef KALDI_PARANOID - MatrixIndexT src_cols = src.NumCols(); - for (MatrixIndexT i = 0; i < num_cols; i++) - KALDI_ASSERT(indices[i] >= -1 && indices[i] < src_cols); + MatrixIndexT src_cols = src.NumCols(); + for (MatrixIndexT i = 0; i < num_cols; i++) + KALDI_ASSERT(indices[i] >= -1 && indices[i] < src_cols); #endif - // For the sake of memory locality we do this row by row, rather - // than doing it column-wise using cublas_Xcopy - for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride, src_data += src_stride) { - const MatrixIndexT *index_ptr = &(indices[0]); - for (MatrixIndexT c = 0; c < num_cols; c++, index_ptr++) { - if (*index_ptr < 0) this_data[c] = 0; - else this_data[c] = src_data[*index_ptr]; + // For the sake of memory locality we do this row by row, rather + // than doing it column-wise using cublas_Xcopy + for (MatrixIndexT r = 0; r < num_rows; + r++, this_data += this_stride, src_data += src_stride) { + const MatrixIndexT *index_ptr = &(indices[0]); + for (MatrixIndexT c = 0; c < num_cols; c++, index_ptr++) { + if (*index_ptr < 0) + this_data[c] = 0; + else + this_data[c] = src_data[*index_ptr]; + } } - } } /* @@ -2833,7 +2954,8 @@ void MatrixBase::AddCols(const MatrixBase &src, // For the sake of memory locality we do this row by row, rather // than doing it column-wise using cublas_Xcopy - for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride, src_data += src_stride) { + for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride, src_data ++= src_stride) { const MatrixIndexT *index_ptr = &(indices[0]); for (MatrixIndexT c = 0; c < num_cols; c++, index_ptr++) { if (*index_ptr >= 0) @@ -2965,7 +3087,8 @@ void MatrixBase::DiffSigmoid(const MatrixBase &value, const MatrixBase &diff) { KALDI_ASSERT(SameDim(*this, value) && SameDim(*this, diff)); MatrixIndexT num_rows = num_rows_, num_cols = num_cols_, - stride = stride_, value_stride = value.stride_, diff_stride = diff.stride_; + stride = stride_, value_stride = value.stride_, diff_stride = +diff.stride_; Real *data = data_; const Real *value_data = value.data_, *diff_data = diff.data_; for (MatrixIndexT r = 0; r < num_rows; r++) { @@ -2982,7 +3105,8 @@ void MatrixBase::DiffTanh(const MatrixBase &value, const MatrixBase &diff) { KALDI_ASSERT(SameDim(*this, value) && SameDim(*this, diff)); MatrixIndexT num_rows = num_rows_, num_cols = num_cols_, - stride = stride_, value_stride = value.stride_, diff_stride = diff.stride_; + stride = stride_, value_stride = value.stride_, diff_stride = +diff.stride_; Real *data = data_; const Real *value_data = value.data_, *diff_data = diff.data_; for (MatrixIndexT r = 0; r < num_rows; r++) { @@ -2997,7 +3121,8 @@ void MatrixBase::DiffTanh(const MatrixBase &value, /* template template -void MatrixBase::AddVecToRows(const Real alpha, const VectorBase &v) { +void MatrixBase::AddVecToRows(const Real alpha, const +VectorBase &v) { const MatrixIndexT num_rows = num_rows_, num_cols = num_cols_, stride = stride_; KALDI_ASSERT(v.Dim() == num_cols); @@ -3028,7 +3153,8 @@ template void MatrixBase::AddVecToRows(const double alpha, template template -void MatrixBase::AddVecToCols(const Real alpha, const VectorBase &v) { +void MatrixBase::AddVecToCols(const Real alpha, const +VectorBase &v) { const MatrixIndexT num_rows = num_rows_, num_cols = num_cols_, stride = stride_; KALDI_ASSERT(v.Dim() == num_rows); @@ -3058,10 +3184,10 @@ template void MatrixBase::AddVecToCols(const double alpha, template void MatrixBase::AddVecToCols(const double alpha, const VectorBase &v); */ -//Explicit instantiation of the classes -//Apparently, it seems to be necessary that the instantiation -//happens at the end of the file. Otherwise, not all the member -//functions will get instantiated. +// Explicit instantiation of the classes +// Apparently, it seems to be necessary that the instantiation +// happens at the end of the file. Otherwise, not all the member +// functions will get instantiated. template class Matrix; template class Matrix; @@ -3070,4 +3196,4 @@ template class MatrixBase; template class SubMatrix; template class SubMatrix; -} // namespace kaldi +} // namespace kaldi diff --git a/runtime/engine/common/matrix/kaldi-matrix.h b/runtime/engine/common/matrix/kaldi-matrix.h index 92274487e..c082a731c 100644 --- a/runtime/engine/common/matrix/kaldi-matrix.h +++ b/runtime/engine/common/matrix/kaldi-matrix.h @@ -38,669 +38,715 @@ namespace kaldi { /// Base class which provides matrix operations not involving resizing /// or allocation. Classes Matrix and SubMatrix inherit from it and take care /// of allocation and resizing. -template +template class MatrixBase { - public: - // so this child can access protected members of other instances. - friend class Matrix; - friend class SubMatrix; - // friend declarations for CUDA matrices (see ../cudamatrix/) - - /// Returns number of rows (or zero for empty matrix). - inline MatrixIndexT NumRows() const { return num_rows_; } - - /// Returns number of columns (or zero for empty matrix). - inline MatrixIndexT NumCols() const { return num_cols_; } - - /// Stride (distance in memory between each row). Will be >= NumCols. - inline MatrixIndexT Stride() const { return stride_; } - - /// Returns size in bytes of the data held by the matrix. - size_t SizeInBytes() const { - return static_cast(num_rows_) * static_cast(stride_) * - sizeof(Real); - } - - /// Gives pointer to raw data (const). - inline const Real* Data() const { - return data_; - } - - /// Gives pointer to raw data (non-const). - inline Real* Data() { return data_; } - - /// Returns pointer to data for one row (non-const) - inline Real* RowData(MatrixIndexT i) { - KALDI_ASSERT(static_cast(i) < - static_cast(num_rows_)); - return data_ + i * stride_; - } - - /// Returns pointer to data for one row (const) - inline const Real* RowData(MatrixIndexT i) const { - KALDI_ASSERT(static_cast(i) < - static_cast(num_rows_)); - return data_ + i * stride_; - } - - /// Indexing operator, non-const - /// (only checks sizes if compiled with -DKALDI_PARANOID) - inline Real& operator() (MatrixIndexT r, MatrixIndexT c) { - KALDI_PARANOID_ASSERT(static_cast(r) < - static_cast(num_rows_) && - static_cast(c) < - static_cast(num_cols_)); - return *(data_ + r * stride_ + c); - } - /// Indexing operator, provided for ease of debugging (gdb doesn't work - /// with parenthesis operator). - Real &Index (MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); } - - /// Indexing operator, const - /// (only checks sizes if compiled with -DKALDI_PARANOID) - inline const Real operator() (MatrixIndexT r, MatrixIndexT c) const { - KALDI_PARANOID_ASSERT(static_cast(r) < - static_cast(num_rows_) && - static_cast(c) < - static_cast(num_cols_)); - return *(data_ + r * stride_ + c); - } - - /* Basic setting-to-special values functions. */ - - /// Sets matrix to zero. - void SetZero(); - /// Sets all elements to a specific value. - void Set(Real); - /// Sets to zero, except ones along diagonal [for non-square matrices too] - - /// Copy given matrix. (no resize is done). - template - void CopyFromMat(const MatrixBase & M, - MatrixTransposeType trans = kNoTrans); - - /// Copy from compressed matrix. - //void CopyFromMat(const CompressedMatrix &M); - - /// Copy given tpmatrix. (no resize is done). - //template - //void CopyFromTp(const TpMatrix &M, - //MatrixTransposeType trans = kNoTrans); - - /// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h - //template - //void CopyFromMat(const CuMatrixBase &M, - //MatrixTransposeType trans = kNoTrans); - - /// This function has two modes of operation. If v.Dim() == NumRows() * - /// NumCols(), then treats the vector as a row-by-row concatenation of a - /// matrix and copies to *this. - /// if v.Dim() == NumCols(), it sets each row of *this to a copy of v. - void CopyRowsFromVec(const VectorBase &v); - - /// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc - //void CopyRowsFromVec(const CuVectorBase &v); - - template - void CopyRowsFromVec(const VectorBase &v); - - /// Copies vector into matrix, column-by-column. - /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows(); - /// this has two modes of operation. - void CopyColsFromVec(const VectorBase &v); - - /// Copy vector into specific column of matrix. - void CopyColFromVec(const VectorBase &v, const MatrixIndexT col); - /// Copy vector into specific row of matrix. - void CopyRowFromVec(const VectorBase &v, const MatrixIndexT row); - /// Copy vector into diagonal of matrix. - void CopyDiagFromVec(const VectorBase &v); - - /* Accessing of sub-parts of the matrix. */ - - /// Return specific row of matrix [const]. - inline const SubVector Row(MatrixIndexT i) const { - KALDI_ASSERT(static_cast(i) < - static_cast(num_rows_)); - return SubVector(data_ + (i * stride_), NumCols()); - } - - /// Return specific row of matrix. - inline SubVector Row(MatrixIndexT i) { - KALDI_ASSERT(static_cast(i) < - static_cast(num_rows_)); - return SubVector(data_ + (i * stride_), NumCols()); - } - - /// Return a sub-part of matrix. - inline SubMatrix Range(const MatrixIndexT row_offset, - const MatrixIndexT num_rows, - const MatrixIndexT col_offset, - const MatrixIndexT num_cols) const { - return SubMatrix(*this, row_offset, num_rows, - col_offset, num_cols); - } - inline SubMatrix RowRange(const MatrixIndexT row_offset, - const MatrixIndexT num_rows) const { - return SubMatrix(*this, row_offset, num_rows, 0, num_cols_); - } - inline SubMatrix ColRange(const MatrixIndexT col_offset, - const MatrixIndexT num_cols) const { - return SubMatrix(*this, 0, num_rows_, col_offset, num_cols); - } - -/* - /// Returns sum of all elements in matrix. - Real Sum() const; - /// Returns trace of matrix. - Real Trace(bool check_square = true) const; - // If check_square = true, will crash if matrix is not square. - - /// Returns maximum element of matrix. - Real Max() const; - /// Returns minimum element of matrix. - Real Min() const; - - /// Element by element multiplication with a given matrix. - void MulElements(const MatrixBase &A); - - /// Divide each element by the corresponding element of a given matrix. - void DivElements(const MatrixBase &A); - - /// Multiply each element with a scalar value. - void Scale(Real alpha); - - /// Set, element-by-element, *this = max(*this, A) - void Max(const MatrixBase &A); - /// Set, element-by-element, *this = min(*this, A) - void Min(const MatrixBase &A); - - /// Equivalent to (*this) = (*this) * diag(scale). Scaling - /// each column by a scalar taken from that dimension of the vector. - void MulColsVec(const VectorBase &scale); - - /// Equivalent to (*this) = diag(scale) * (*this). Scaling - /// each row by a scalar taken from that dimension of the vector. - void MulRowsVec(const VectorBase &scale); - - /// Divide each row into src.NumCols() equal groups, and then scale i'th row's - /// j'th group of elements by src(i, j). Requires src.NumRows() == - /// this->NumRows() and this->NumCols() % src.NumCols() == 0. - void MulRowsGroupMat(const MatrixBase &src); - - /// Returns logdet of matrix. - Real LogDet(Real *det_sign = NULL) const; - - /// matrix inverse. - /// if inverse_needed = false, will fill matrix with garbage. - /// (only useful if logdet wanted). - void Invert(Real *log_det = NULL, Real *det_sign = NULL, - bool inverse_needed = true); - /// matrix inverse [double]. - /// if inverse_needed = false, will fill matrix with garbage - /// (only useful if logdet wanted). - /// Does inversion in double precision even if matrix was not double. - void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL, - bool inverse_needed = true); -*/ - /// Inverts all the elements of the matrix - void InvertElements(); -/* - /// Transpose the matrix. This one is only - /// applicable to square matrices (the one in the - /// Matrix child class works also for non-square. - void Transpose(); - -*/ - /// Copies column r from column indices[r] of src. - /// As a special case, if indexes[i] == -1, sets column i to zero. - /// all elements of "indices" must be in [-1, src.NumCols()-1], - /// and src.NumRows() must equal this.NumRows() - void CopyCols(const MatrixBase &src, - const MatrixIndexT *indices); - - /// Copies row r from row indices[r] of src (does nothing - /// As a special case, if indexes[i] == -1, sets row i to zero. - /// all elements of "indices" must be in [-1, src.NumRows()-1], - /// and src.NumCols() must equal this.NumCols() - void CopyRows(const MatrixBase &src, - const MatrixIndexT *indices); - - /// Add column indices[r] of src to column r. - /// As a special case, if indexes[i] == -1, skip column i - /// indices.size() must equal this->NumCols(), - /// all elements of "reorder" must be in [-1, src.NumCols()-1], - /// and src.NumRows() must equal this.NumRows() - //void AddCols(const MatrixBase &src, - // const MatrixIndexT *indices); - - /// Copies row r of this matrix from an array of floats at the location given - /// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero. - /// Note: we are using "pointer to const pointer to const object" for "src", - /// because we may create "src" by calling Data() of const CuArray - void CopyRows(const Real *const *src); - - /// Copies row r of this matrix to the array of floats at the location given - /// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that none - /// of the memory regions pointed to by the pointers in "dst" overlap (e.g. - /// none of the pointers should be the same). - void CopyToRows(Real *const *dst) const; - - /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). - /// If indexes[r] < 0, does not add anything. all elements of "indexes" must - /// be in [-1, src.NumRows()-1], and src.NumCols() must equal this.NumCols(). - // void AddRows(Real alpha, - // const MatrixBase &src, - // const MatrixIndexT *indexes); - - /// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the - /// beginning of a region of memory representing a vector of floats, of the - /// same length as this.NumCols(). If src[r] is NULL, does not add anything. - //void AddRows(Real alpha, const Real *const *src); - - /// For each row r of this matrix, adds it (times alpha) to the array of - /// floats at the location given by dst[r]. If dst[r] is NULL, does not do - /// anything for that row. Requires that none of the memory regions pointed - /// to by the pointers in "dst" overlap (e.g. none of the pointers should be - /// the same). - //void AddToRows(Real alpha, Real *const *dst) const; - - /// For each row i of *this, adds this->Row(i) to - /// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing. - /// Requires that all the indexes[i] that are >= 0 - /// be distinct, otherwise the behavior is undefined. - //void AddToRows(Real alpha, - // const MatrixIndexT *indexes, + public: + // so this child can access protected members of other instances. + friend class Matrix; + friend class SubMatrix; + // friend declarations for CUDA matrices (see ../cudamatrix/) + + /// Returns number of rows (or zero for empty matrix). + inline MatrixIndexT NumRows() const { return num_rows_; } + + /// Returns number of columns (or zero for empty matrix). + inline MatrixIndexT NumCols() const { return num_cols_; } + + /// Stride (distance in memory between each row). Will be >= NumCols. + inline MatrixIndexT Stride() const { return stride_; } + + /// Returns size in bytes of the data held by the matrix. + size_t SizeInBytes() const { + return static_cast(num_rows_) * static_cast(stride_) * + sizeof(Real); + } + + /// Gives pointer to raw data (const). + inline const Real *Data() const { return data_; } + + /// Gives pointer to raw data (non-const). + inline Real *Data() { return data_; } + + /// Returns pointer to data for one row (non-const) + inline Real *RowData(MatrixIndexT i) { + KALDI_ASSERT(static_cast(i) < + static_cast(num_rows_)); + return data_ + i * stride_; + } + + /// Returns pointer to data for one row (const) + inline const Real *RowData(MatrixIndexT i) const { + KALDI_ASSERT(static_cast(i) < + static_cast(num_rows_)); + return data_ + i * stride_; + } + + /// Indexing operator, non-const + /// (only checks sizes if compiled with -DKALDI_PARANOID) + inline Real &operator()(MatrixIndexT r, MatrixIndexT c) { + KALDI_PARANOID_ASSERT( + static_cast(r) < + static_cast(num_rows_) && + static_cast(c) < + static_cast(num_cols_)); + return *(data_ + r * stride_ + c); + } + /// Indexing operator, provided for ease of debugging (gdb doesn't work + /// with parenthesis operator). + Real &Index(MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); } + + /// Indexing operator, const + /// (only checks sizes if compiled with -DKALDI_PARANOID) + inline const Real operator()(MatrixIndexT r, MatrixIndexT c) const { + KALDI_PARANOID_ASSERT( + static_cast(r) < + static_cast(num_rows_) && + static_cast(c) < + static_cast(num_cols_)); + return *(data_ + r * stride_ + c); + } + + /* Basic setting-to-special values functions. */ + + /// Sets matrix to zero. + void SetZero(); + /// Sets all elements to a specific value. + void Set(Real); + /// Sets to zero, except ones along diagonal [for non-square matrices too] + + /// Copy given matrix. (no resize is done). + template + void CopyFromMat(const MatrixBase &M, + MatrixTransposeType trans = kNoTrans); + + /// Copy from compressed matrix. + // void CopyFromMat(const CompressedMatrix &M); + + /// Copy given tpmatrix. (no resize is done). + // template + // void CopyFromTp(const TpMatrix &M, + // MatrixTransposeType trans = kNoTrans); + + /// Copy from CUDA matrix. Implemented in ../cudamatrix/cu-matrix.h + // template + // void CopyFromMat(const CuMatrixBase &M, + // MatrixTransposeType trans = kNoTrans); + + /// This function has two modes of operation. If v.Dim() == NumRows() * + /// NumCols(), then treats the vector as a row-by-row concatenation of a + /// matrix and copies to *this. + /// if v.Dim() == NumCols(), it sets each row of *this to a copy of v. + void CopyRowsFromVec(const VectorBase &v); + + /// This version of CopyRowsFromVec is implemented in + /// ../cudamatrix/cu-vector.cc + // void CopyRowsFromVec(const CuVectorBase &v); + + template + void CopyRowsFromVec(const VectorBase &v); + + /// Copies vector into matrix, column-by-column. + /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows(); + /// this has two modes of operation. + void CopyColsFromVec(const VectorBase &v); + + /// Copy vector into specific column of matrix. + void CopyColFromVec(const VectorBase &v, const MatrixIndexT col); + /// Copy vector into specific row of matrix. + void CopyRowFromVec(const VectorBase &v, const MatrixIndexT row); + /// Copy vector into diagonal of matrix. + void CopyDiagFromVec(const VectorBase &v); + + /* Accessing of sub-parts of the matrix. */ + + /// Return specific row of matrix [const]. + inline const SubVector Row(MatrixIndexT i) const { + KALDI_ASSERT(static_cast(i) < + static_cast(num_rows_)); + return SubVector(data_ + (i * stride_), NumCols()); + } + + /// Return specific row of matrix. + inline SubVector Row(MatrixIndexT i) { + KALDI_ASSERT(static_cast(i) < + static_cast(num_rows_)); + return SubVector(data_ + (i * stride_), NumCols()); + } + + /// Return a sub-part of matrix. + inline SubMatrix Range(const MatrixIndexT row_offset, + const MatrixIndexT num_rows, + const MatrixIndexT col_offset, + const MatrixIndexT num_cols) const { + return SubMatrix( + *this, row_offset, num_rows, col_offset, num_cols); + } + inline SubMatrix RowRange(const MatrixIndexT row_offset, + const MatrixIndexT num_rows) const { + return SubMatrix(*this, row_offset, num_rows, 0, num_cols_); + } + inline SubMatrix ColRange(const MatrixIndexT col_offset, + const MatrixIndexT num_cols) const { + return SubMatrix(*this, 0, num_rows_, col_offset, num_cols); + } + + /* + /// Returns sum of all elements in matrix. + Real Sum() const; + /// Returns trace of matrix. + Real Trace(bool check_square = true) const; + // If check_square = true, will crash if matrix is not square. + + /// Returns maximum element of matrix. + Real Max() const; + /// Returns minimum element of matrix. + Real Min() const; + + /// Element by element multiplication with a given matrix. + void MulElements(const MatrixBase &A); + + /// Divide each element by the corresponding element of a given matrix. + void DivElements(const MatrixBase &A); + + /// Multiply each element with a scalar value. + void Scale(Real alpha); + + /// Set, element-by-element, *this = max(*this, A) + void Max(const MatrixBase &A); + /// Set, element-by-element, *this = min(*this, A) + void Min(const MatrixBase &A); + + /// Equivalent to (*this) = (*this) * diag(scale). Scaling + /// each column by a scalar taken from that dimension of the vector. + void MulColsVec(const VectorBase &scale); + + /// Equivalent to (*this) = diag(scale) * (*this). Scaling + /// each row by a scalar taken from that dimension of the vector. + void MulRowsVec(const VectorBase &scale); + + /// Divide each row into src.NumCols() equal groups, and then scale i'th + row's + /// j'th group of elements by src(i, j). Requires src.NumRows() == + /// this->NumRows() and this->NumCols() % src.NumCols() == 0. + void MulRowsGroupMat(const MatrixBase &src); + + /// Returns logdet of matrix. + Real LogDet(Real *det_sign = NULL) const; + + /// matrix inverse. + /// if inverse_needed = false, will fill matrix with garbage. + /// (only useful if logdet wanted). + void Invert(Real *log_det = NULL, Real *det_sign = NULL, + bool inverse_needed = true); + /// matrix inverse [double]. + /// if inverse_needed = false, will fill matrix with garbage + /// (only useful if logdet wanted). + /// Does inversion in double precision even if matrix was not double. + void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL, + bool inverse_needed = true); + */ + /// Inverts all the elements of the matrix + void InvertElements(); + /* + /// Transpose the matrix. This one is only + /// applicable to square matrices (the one in the + /// Matrix child class works also for non-square. + void Transpose(); + + */ + /// Copies column r from column indices[r] of src. + /// As a special case, if indexes[i] == -1, sets column i to zero. + /// all elements of "indices" must be in [-1, src.NumCols()-1], + /// and src.NumRows() must equal this.NumRows() + void CopyCols(const MatrixBase &src, const MatrixIndexT *indices); + + /// Copies row r from row indices[r] of src (does nothing + /// As a special case, if indexes[i] == -1, sets row i to zero. + /// all elements of "indices" must be in [-1, src.NumRows()-1], + /// and src.NumCols() must equal this.NumCols() + void CopyRows(const MatrixBase &src, const MatrixIndexT *indices); + + /// Add column indices[r] of src to column r. + /// As a special case, if indexes[i] == -1, skip column i + /// indices.size() must equal this->NumCols(), + /// all elements of "reorder" must be in [-1, src.NumCols()-1], + /// and src.NumRows() must equal this.NumRows() + // void AddCols(const MatrixBase &src, + // const MatrixIndexT *indices); + + /// Copies row r of this matrix from an array of floats at the location + /// given + /// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero. + /// Note: we are using "pointer to const pointer to const object" for "src", + /// because we may create "src" by calling Data() of const CuArray + void CopyRows(const Real *const *src); + + /// Copies row r of this matrix to the array of floats at the location given + /// by dst[r]. If dst[r] is NULL, does not copy anywhere. Requires that + /// none + /// of the memory regions pointed to by the pointers in "dst" overlap (e.g. + /// none of the pointers should be the same). + void CopyToRows(Real *const *dst) const; + + /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). + /// If indexes[r] < 0, does not add anything. all elements of "indexes" must + /// be in [-1, src.NumRows()-1], and src.NumCols() must equal + /// this.NumCols(). + // void AddRows(Real alpha, + // const MatrixBase &src, + // const MatrixIndexT *indexes); + + /// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as + /// the + /// beginning of a region of memory representing a vector of floats, of the + /// same length as this.NumCols(). If src[r] is NULL, does not add anything. + // void AddRows(Real alpha, const Real *const *src); + + /// For each row r of this matrix, adds it (times alpha) to the array of + /// floats at the location given by dst[r]. If dst[r] is NULL, does not do + /// anything for that row. Requires that none of the memory regions pointed + /// to by the pointers in "dst" overlap (e.g. none of the pointers should be + /// the same). + // void AddToRows(Real alpha, Real *const *dst) const; + + /// For each row i of *this, adds this->Row(i) to + /// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing. + /// Requires that all the indexes[i] that are >= 0 + /// be distinct, otherwise the behavior is undefined. + // void AddToRows(Real alpha, + // const MatrixIndexT *indexes, // MatrixBase *dst) const; -/* - inline void ApplyPow(Real power) { - this -> Pow(*this, power); - } - - - inline void ApplyPowAbs(Real power, bool include_sign=false) { - this -> PowAbs(*this, power, include_sign); - } - - inline void ApplyHeaviside() { - this -> Heaviside(*this); - } - - inline void ApplyFloor(Real floor_val) { - this -> Floor(*this, floor_val); - } - - inline void ApplyCeiling(Real ceiling_val) { - this -> Ceiling(*this, ceiling_val); - } - - inline void ApplyExp() { - this -> Exp(*this); - } - - inline void ApplyExpSpecial() { - this -> ExpSpecial(*this); - } - - inline void ApplyExpLimited(Real lower_limit, Real upper_limit) { - this -> ExpLimited(*this, lower_limit, upper_limit); - } - - inline void ApplyLog() { - this -> Log(*this); - } -*/ - /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D - /// P^{-1}. Be careful: the relationship of D to the eigenvalues we output is - /// slightly complicated, due to the need for P to be real. In the symmetric - /// case D is diagonal and real, but in - /// the non-symmetric case there may be complex-conjugate pairs of eigenvalues. - /// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually - /// be block diagonal, with 2x2 blocks corresponding to any such pairs. If a - /// pair is lambda +- i*mu, D will have a corresponding 2x2 block - /// [lambda, mu; -mu, lambda]. - /// Note that if the input matrix (*this) is non-invertible, P may not be invertible - /// so in this case instead of the equation (*this) = P D P^{-1} holding, we have - /// instead (*this) P = P D. - /// - /// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag. - //void Eig(MatrixBase *P, - // VectorBase *eigs_real, + /* + inline void ApplyPow(Real power) { + this -> Pow(*this, power); + } + + + inline void ApplyPowAbs(Real power, bool include_sign=false) { + this -> PowAbs(*this, power, include_sign); + } + + inline void ApplyHeaviside() { + this -> Heaviside(*this); + } + + inline void ApplyFloor(Real floor_val) { + this -> Floor(*this, floor_val); + } + + inline void ApplyCeiling(Real ceiling_val) { + this -> Ceiling(*this, ceiling_val); + } + + inline void ApplyExp() { + this -> Exp(*this); + } + + inline void ApplyExpSpecial() { + this -> ExpSpecial(*this); + } + + inline void ApplyExpLimited(Real lower_limit, Real upper_limit) { + this -> ExpLimited(*this, lower_limit, upper_limit); + } + + inline void ApplyLog() { + this -> Log(*this); + } + */ + /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = + /// P D + /// P^{-1}. Be careful: the relationship of D to the eigenvalues we output + /// is + /// slightly complicated, due to the need for P to be real. In the + /// symmetric + /// case D is diagonal and real, but in + /// the non-symmetric case there may be complex-conjugate pairs of + /// eigenvalues. + /// In this case, for the equation (*this) = P D P^{-1} to hold, D must + /// actually + /// be block diagonal, with 2x2 blocks corresponding to any such pairs. If + /// a + /// pair is lambda +- i*mu, D will have a corresponding 2x2 block + /// [lambda, mu; -mu, lambda]. + /// Note that if the input matrix (*this) is non-invertible, P may not be + /// invertible + /// so in this case instead of the equation (*this) = P D P^{-1} holding, we + /// have + /// instead (*this) P = P D. + /// + /// The non-member function CreateEigenvalueMatrix creates D from eigs_real + /// and eigs_imag. + // void Eig(MatrixBase *P, + // VectorBase *eigs_real, // VectorBase *eigs_imag) const; - /// The Power method attempts to take the matrix to a power using a method that - /// works in general for fractional and negative powers. The input matrix must - /// be invertible and have reasonable condition (or we don't guarantee the - /// results. The method is based on the eigenvalue decomposition. It will - /// return false and leave the matrix unchanged, if at entry the matrix had - /// real negative eigenvalues (or if it had zero eigenvalues and the power was - /// negative). -// bool Power(Real pow); - - /** Singular value decomposition - Major limitations: - For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return - the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the - one on the left is rectangular. - - In Svd, *this = U*diag(S)*Vt. - Null pointers for U and/or Vt at input mean we do not want that output. We - expect that S.Dim() == m, U is either NULL or m by n, - and v is either NULL or n by n. - The singular values are not sorted (use SortSvd for that). */ - //void DestructiveSvd(VectorBase *s, MatrixBase *U, - // MatrixBase *Vt); // Destroys calling matrix. - - /// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is already - /// transposed; the normal formulation is U diag(s) V^T. - /// Null pointers for U or V mean we don't want that output (this saves - /// compute). The singular values are not sorted (use SortSvd for that). - //void Svd(VectorBase *s, MatrixBase *U, - // MatrixBase *Vt) const; - /// Compute SVD but only retain the singular values. - //void Svd(VectorBase *s) const { Svd(s, NULL, NULL); } - - - /// Returns smallest singular value. - //Real MinSingularValue() const { - // Vector tmp(std::min(NumRows(), NumCols())); - //Svd(&tmp); - //return tmp.Min(); - //} - - //void TestUninitialized() const; // This function is designed so that if any element - // if the matrix is uninitialized memory, valgrind will complain. - - /// Returns condition number by computing Svd. Works even if cols > rows. - /// Returns infinity if all singular values are zero. - /* - Real Cond() const; - - /// Returns true if matrix is Symmetric. - bool IsSymmetric(Real cutoff = 1.0e-05) const; // replace magic number - - /// Returns true if matrix is Diagonal. - bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number - - /// Returns true if the matrix is all zeros, except for ones on diagonal. (it - /// does not have to be square). More specifically, this function returns - /// false if for any i, j, (*this)(i, j) differs by more than cutoff from the - /// expression (i == j ? 1 : 0). - bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number - - /// Returns true if matrix is all zeros. - bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number - - /// Frobenius norm, which is the sqrt of sum of square elements. Same as Schatten 2-norm, - /// or just "2-norm". - Real FrobeniusNorm() const; - - /// Returns true if ((*this)-other).FrobeniusNorm() - /// <= tol * (*this).FrobeniusNorm(). - bool ApproxEqual(const MatrixBase &other, float tol = 0.01) const; - - /// Tests for exact equality. It's usually preferable to use ApproxEqual. - bool Equal(const MatrixBase &other) const; - - /// largest absolute value. - Real LargestAbsElem() const; // largest absolute value. - - /// Returns log(sum(exp())) without exp overflow - /// If prune > 0.0, it uses a pruning beam, discarding - /// terms less than (max - prune). Note: in future - /// we may change this so that if prune = 0.0, it takes - /// the max, so use -1 if you don't want to prune. - Real LogSumExp(Real prune = -1.0) const; - - /// Apply soft-max to the collection of all elements of the - /// matrix and return normalizer (log sum of exponentials). - Real ApplySoftMax(); - - /// Set each element to the sigmoid of the corresponding element of "src". - void Sigmoid(const MatrixBase &src); - - /// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the - /// corresponding element in "src". Note: in general you can make different - /// choices for x = 0, but for now please leave it as it (i.e. returning zero) - /// because it affects the RectifiedLinearComponent in the neural net code. - void Heaviside(const MatrixBase &src); - - void Exp(const MatrixBase &src); - - void Pow(const MatrixBase &src, Real power); - - void Log(const MatrixBase &src); - - /// Apply power to the absolute value of each element. - /// If include_sign is true, the result will be multiplied with - /// the sign of the input value. - /// If the power is negative and the input to the power is zero, - /// The output will be set zero. If include_sign is true, it will - /// multiply the result by the sign of the input. - void PowAbs(const MatrixBase &src, Real power, bool include_sign=false); - - void Floor(const MatrixBase &src, Real floor_val); - - void Ceiling(const MatrixBase &src, Real ceiling_val); - - /// For each element x of the matrix, set it to - /// (x < 0 ? exp(x) : x + 1). This function is used - /// in our RNNLM training. - void ExpSpecial(const MatrixBase &src); - - /// This is equivalent to running: - /// Floor(src, lower_limit); - /// Ceiling(src, upper_limit); - /// Exp(src) - void ExpLimited(const MatrixBase &src, Real lower_limit, Real upper_limit); - - /// Set each element to y = log(1 + exp(x)) - void SoftHinge(const MatrixBase &src); - - /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p). - /// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0. - void GroupPnorm(const MatrixBase &src, Real power); - - /// Calculate derivatives for the GroupPnorm function above... - /// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable), - /// and "output" is the result of the computation (i.e. the "this" of that function - /// call), and *this has the same dimension as "input", then it sets each element - /// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where - /// "output-elem" is whichever element of output depends on that input element. - void GroupPnormDeriv(const MatrixBase &input, const MatrixBase &output, - Real power); - - /// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j - /// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0. - void GroupMax(const MatrixBase &src); - - /// Calculate derivatives for the GroupMax function above, where - /// "input" is the input to the GroupMax function above (i.e. the "src" variable), - /// and "output" is the result of the computation (i.e. the "this" of that function - /// call), and *this must have the same dimension as "input". Each element - /// of *this will be set to 1 if the corresponding input equals the output of - /// the group, and 0 otherwise. The equals the function derivative where it is - /// defined (it's not defined where multiple inputs in the group are equal to the output). - void GroupMaxDeriv(const MatrixBase &input, const MatrixBase &output); - - /// Set each element to the tanh of the corresponding element of "src". - void Tanh(const MatrixBase &src); - - // Function used in backpropagating derivatives of the sigmoid function: - // element-by-element, set *this = diff * value * (1.0 - value). - void DiffSigmoid(const MatrixBase &value, - const MatrixBase &diff); - - // Function used in backpropagating derivatives of the tanh function: - // element-by-element, set *this = diff * (1.0 - value^2). - void DiffTanh(const MatrixBase &value, - const MatrixBase &diff); -*/ - /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive - * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an - * orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not - * positive semi-definite (check_thresh controls how stringent the check is; - * set it to 2 to ensure it won't ever complain, but it will zero out negative - * dimensions in your matrix. - * - * Caution: if you want the eigenvalues, it may make more sense to convert to - * SpMatrix and use Eig() function there, which uses eigenvalue decomposition - * directly rather than SVD. - */ + /// The Power method attempts to take the matrix to a power using a method + /// that + /// works in general for fractional and negative powers. The input matrix + /// must + /// be invertible and have reasonable condition (or we don't guarantee the + /// results. The method is based on the eigenvalue decomposition. It will + /// return false and leave the matrix unchanged, if at entry the matrix had + /// real negative eigenvalues (or if it had zero eigenvalues and the power + /// was + /// negative). + // bool Power(Real pow); + + /** Singular value decomposition + Major limitations: + For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we + return + the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the + one on the left is rectangular. + + In Svd, *this = U*diag(S)*Vt. + Null pointers for U and/or Vt at input mean we do not want that output. + We + expect that S.Dim() == m, U is either NULL or m by n, + and v is either NULL or n by n. + The singular values are not sorted (use SortSvd for that). */ + // void DestructiveSvd(VectorBase *s, MatrixBase *U, + // MatrixBase *Vt); // Destroys calling matrix. + + /// Compute SVD (*this) = U diag(s) Vt. Note that the V in the call is + /// already + /// transposed; the normal formulation is U diag(s) V^T. + /// Null pointers for U or V mean we don't want that output (this saves + /// compute). The singular values are not sorted (use SortSvd for that). + // void Svd(VectorBase *s, MatrixBase *U, + // MatrixBase *Vt) const; + /// Compute SVD but only retain the singular values. + // void Svd(VectorBase *s) const { Svd(s, NULL, NULL); } + + + /// Returns smallest singular value. + // Real MinSingularValue() const { + // Vector tmp(std::min(NumRows(), NumCols())); + // Svd(&tmp); + // return tmp.Min(); + //} - /// stream read. - /// Use instead of stream<<*this, if you want to add to existing contents. - // Will throw exception on failure. - void Read(std::istream & in, bool binary); - /// write to stream. - void Write(std::ostream & out, bool binary) const; - - // Below is internal methods for Svd, user does not have to know about this. - protected: - - /// Initializer, callable only from child. - explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) : - data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) { - KALDI_ASSERT_IS_FLOATING_TYPE(Real); - } - - /// Initializer, callable only from child. - /// Empty initializer, for un-initialized matrix. - explicit MatrixBase(): data_(NULL) { - KALDI_ASSERT_IS_FLOATING_TYPE(Real); - } - - // Make sure pointers to MatrixBase cannot be deleted. - ~MatrixBase() { } - - /// A workaround that allows SubMatrix to get a pointer to non-const data - /// for const Matrix. Unfortunately C++ does not allow us to declare a - /// "public const" inheritance or anything like that, so it would require - /// a lot of work to make the SubMatrix class totally const-correct-- - /// we would have to override many of the Matrix functions. - inline Real* Data_workaround() const { - return data_; - } - - /// data memory area - Real* data_; - - /// these attributes store the real matrix size as it is stored in memory - /// including memalignment - MatrixIndexT num_cols_; /// < Number of columns - MatrixIndexT num_rows_; /// < Number of rows - /** True number of columns for the internal matrix. This number may differ - * from num_cols_ as memory alignment might be used. */ - MatrixIndexT stride_; - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase); + // void TestUninitialized() const; // This function is designed so that if + // any element + // if the matrix is uninitialized memory, valgrind will complain. + + /// Returns condition number by computing Svd. Works even if cols > rows. + /// Returns infinity if all singular values are zero. + /* + Real Cond() const; + + /// Returns true if matrix is Symmetric. + bool IsSymmetric(Real cutoff = 1.0e-05) const; // replace magic number + + /// Returns true if matrix is Diagonal. + bool IsDiagonal(Real cutoff = 1.0e-05) const; // replace magic number + + /// Returns true if the matrix is all zeros, except for ones on diagonal. + (it + /// does not have to be square). More specifically, this function returns + /// false if for any i, j, (*this)(i, j) differs by more than cutoff from + the + /// expression (i == j ? 1 : 0). + bool IsUnit(Real cutoff = 1.0e-05) const; // replace magic number + + /// Returns true if matrix is all zeros. + bool IsZero(Real cutoff = 1.0e-05) const; // replace magic number + + /// Frobenius norm, which is the sqrt of sum of square elements. Same as + Schatten 2-norm, + /// or just "2-norm". + Real FrobeniusNorm() const; + + /// Returns true if ((*this)-other).FrobeniusNorm() + /// <= tol * (*this).FrobeniusNorm(). + bool ApproxEqual(const MatrixBase &other, float tol = 0.01) const; + + /// Tests for exact equality. It's usually preferable to use ApproxEqual. + bool Equal(const MatrixBase &other) const; + + /// largest absolute value. + Real LargestAbsElem() const; // largest absolute value. + + /// Returns log(sum(exp())) without exp overflow + /// If prune > 0.0, it uses a pruning beam, discarding + /// terms less than (max - prune). Note: in future + /// we may change this so that if prune = 0.0, it takes + /// the max, so use -1 if you don't want to prune. + Real LogSumExp(Real prune = -1.0) const; + + /// Apply soft-max to the collection of all elements of the + /// matrix and return normalizer (log sum of exponentials). + Real ApplySoftMax(); + + /// Set each element to the sigmoid of the corresponding element of "src". + void Sigmoid(const MatrixBase &src); + + /// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the + /// corresponding element in "src". Note: in general you can make different + /// choices for x = 0, but for now please leave it as it (i.e. returning + zero) + /// because it affects the RectifiedLinearComponent in the neural net code. + void Heaviside(const MatrixBase &src); + + void Exp(const MatrixBase &src); + + void Pow(const MatrixBase &src, Real power); + + void Log(const MatrixBase &src); + + /// Apply power to the absolute value of each element. + /// If include_sign is true, the result will be multiplied with + /// the sign of the input value. + /// If the power is negative and the input to the power is zero, + /// The output will be set zero. If include_sign is true, it will + /// multiply the result by the sign of the input. + void PowAbs(const MatrixBase &src, Real power, bool + include_sign=false); + + void Floor(const MatrixBase &src, Real floor_val); + + void Ceiling(const MatrixBase &src, Real ceiling_val); + + /// For each element x of the matrix, set it to + /// (x < 0 ? exp(x) : x + 1). This function is used + /// in our RNNLM training. + void ExpSpecial(const MatrixBase &src); + + /// This is equivalent to running: + /// Floor(src, lower_limit); + /// Ceiling(src, upper_limit); + /// Exp(src) + void ExpLimited(const MatrixBase &src, Real lower_limit, Real + upper_limit); + + /// Set each element to y = log(1 + exp(x)) + void SoftHinge(const MatrixBase &src); + + /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / + p). + /// Requires src.NumRows() == this->NumRows() and src.NumCols() % + this->NumCols() == 0. + void GroupPnorm(const MatrixBase &src, Real power); + + /// Calculate derivatives for the GroupPnorm function above... + /// if "input" is the input to the GroupPnorm function above (i.e. the "src" + variable), + /// and "output" is the result of the computation (i.e. the "this" of that + function + /// call), and *this has the same dimension as "input", then it sets each + element + /// of *this to the derivative d(output-elem)/d(input-elem) for each element + of "input", where + /// "output-elem" is whichever element of output depends on that input + element. + void GroupPnormDeriv(const MatrixBase &input, const MatrixBase + &output, + Real power); + + /// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j + /// Requires src.NumRows() == this->NumRows() and src.NumCols() % + this->NumCols() == 0. + void GroupMax(const MatrixBase &src); + + /// Calculate derivatives for the GroupMax function above, where + /// "input" is the input to the GroupMax function above (i.e. the "src" + variable), + /// and "output" is the result of the computation (i.e. the "this" of that + function + /// call), and *this must have the same dimension as "input". Each element + /// of *this will be set to 1 if the corresponding input equals the output + of + /// the group, and 0 otherwise. The equals the function derivative where it + is + /// defined (it's not defined where multiple inputs in the group are equal + to the output). + void GroupMaxDeriv(const MatrixBase &input, const MatrixBase + &output); + + /// Set each element to the tanh of the corresponding element of "src". + void Tanh(const MatrixBase &src); + + // Function used in backpropagating derivatives of the sigmoid function: + // element-by-element, set *this = diff * value * (1.0 - value). + void DiffSigmoid(const MatrixBase &value, + const MatrixBase &diff); + + // Function used in backpropagating derivatives of the tanh function: + // element-by-element, set *this = diff * (1.0 - value^2). + void DiffTanh(const MatrixBase &value, + const MatrixBase &diff); + */ + /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive + * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an + * orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not + * positive semi-definite (check_thresh controls how stringent the check is; + * set it to 2 to ensure it won't ever complain, but it will zero out + * negative + * dimensions in your matrix. + * + * Caution: if you want the eigenvalues, it may make more sense to convert + * to + * SpMatrix and use Eig() function there, which uses eigenvalue + * decomposition + * directly rather than SVD. + */ + + /// stream read. + /// Use instead of stream<<*this, if you want to add to existing contents. + // Will throw exception on failure. + void Read(std::istream &in, bool binary); + /// write to stream. + void Write(std::ostream &out, bool binary) const; + + // Below is internal methods for Svd, user does not have to know about this. + protected: + /// Initializer, callable only from child. + explicit MatrixBase(Real *data, + MatrixIndexT cols, + MatrixIndexT rows, + MatrixIndexT stride) + : data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) { + KALDI_ASSERT_IS_FLOATING_TYPE(Real); + } + + /// Initializer, callable only from child. + /// Empty initializer, for un-initialized matrix. + explicit MatrixBase() : data_(NULL) { KALDI_ASSERT_IS_FLOATING_TYPE(Real); } + + // Make sure pointers to MatrixBase cannot be deleted. + ~MatrixBase() {} + + /// A workaround that allows SubMatrix to get a pointer to non-const data + /// for const Matrix. Unfortunately C++ does not allow us to declare a + /// "public const" inheritance or anything like that, so it would require + /// a lot of work to make the SubMatrix class totally const-correct-- + /// we would have to override many of the Matrix functions. + inline Real *Data_workaround() const { return data_; } + + /// data memory area + Real *data_; + + /// these attributes store the real matrix size as it is stored in memory + /// including memalignment + MatrixIndexT num_cols_; /// < Number of columns + MatrixIndexT num_rows_; /// < Number of rows + /** True number of columns for the internal matrix. This number may differ + * from num_cols_ as memory alignment might be used. */ + MatrixIndexT stride_; + + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase); }; /// A class for storing matrices. -template +template class Matrix : public MatrixBase { - public: - - /// Empty constructor. - Matrix(); - - /// Basic constructor. - Matrix(const MatrixIndexT r, const MatrixIndexT c, - MatrixResizeType resize_type = kSetZero, - MatrixStrideType stride_type = kDefaultStride): - MatrixBase() { Resize(r, c, resize_type, stride_type); } - - /// Swaps the contents of *this and *other. Shallow swap. - void Swap(Matrix *other); - - /// Constructor from any MatrixBase. Can also copy with transpose. - /// Allocates new memory. - explicit Matrix(const MatrixBase & M, - MatrixTransposeType trans = kNoTrans); + public: + /// Empty constructor. + Matrix(); + + /// Basic constructor. + Matrix(const MatrixIndexT r, + const MatrixIndexT c, + MatrixResizeType resize_type = kSetZero, + MatrixStrideType stride_type = kDefaultStride) + : MatrixBase() { + Resize(r, c, resize_type, stride_type); + } + + /// Swaps the contents of *this and *other. Shallow swap. + void Swap(Matrix *other); + + /// Constructor from any MatrixBase. Can also copy with transpose. + /// Allocates new memory. + explicit Matrix(const MatrixBase &M, + MatrixTransposeType trans = kNoTrans); - /// Same as above, but need to avoid default copy constructor. - Matrix(const Matrix & M); // (cannot make explicit) + /// Same as above, but need to avoid default copy constructor. + Matrix(const Matrix &M); // (cannot make explicit) - /// Copy constructor: as above, but from another type. - template - explicit Matrix(const MatrixBase & M, + /// Copy constructor: as above, but from another type. + template + explicit Matrix(const MatrixBase &M, MatrixTransposeType trans = kNoTrans); - /// Copy constructor taking TpMatrix... - //template - //explicit Matrix(const TpMatrix & M, - //MatrixTransposeType trans = kNoTrans) : MatrixBase() { - //if (trans == kNoTrans) { - //Resize(M.NumRows(), M.NumCols(), kUndefined); - //this->CopyFromTp(M); + /// Copy constructor taking TpMatrix... + // template + // explicit Matrix(const TpMatrix & M, + // MatrixTransposeType trans = kNoTrans) : MatrixBase() { + // if (trans == kNoTrans) { + // Resize(M.NumRows(), M.NumCols(), kUndefined); + // this->CopyFromTp(M); //} else { - //Resize(M.NumCols(), M.NumRows(), kUndefined); - //this->CopyFromTp(M, kTrans); + // Resize(M.NumCols(), M.NumRows(), kUndefined); + // this->CopyFromTp(M, kTrans); + //} //} - //} - - /// read from stream. - // Unlike one in base, allows resizing. - void Read(std::istream & in, bool binary); - - /// Remove a specified row. - void RemoveRow(MatrixIndexT i); - - /// Transpose the matrix. Works for non-square - /// matrices as well as square ones. - //void Transpose(); - - /// Distructor to free matrices. - ~Matrix() { Destroy(); } - - /// Sets matrix to a specified size (zero is OK as long as both r and c are - /// zero). The value of the new data depends on resize_type: - /// -if kSetZero, the new data will be zero - /// -if kUndefined, the new data will be undefined - /// -if kCopyData, the new data will be the same as the old data in any - /// shared positions, and zero elsewhere. - /// - /// You can set stride_type to kStrideEqualNumCols to force the stride - /// to equal the number of columns; by default it is set so that the stride - /// in bytes is a multiple of 16. - /// - /// This function takes time proportional to the number of data elements. - void Resize(const MatrixIndexT r, - const MatrixIndexT c, - MatrixResizeType resize_type = kSetZero, - MatrixStrideType stride_type = kDefaultStride); - - /// Assignment operator that takes MatrixBase. - Matrix &operator = (const MatrixBase &other) { - if (MatrixBase::NumRows() != other.NumRows() || - MatrixBase::NumCols() != other.NumCols()) - Resize(other.NumRows(), other.NumCols(), kUndefined); - MatrixBase::CopyFromMat(other); - return *this; - } - - /// Assignment operator. Needed for inclusion in std::vector. - Matrix &operator = (const Matrix &other) { - if (MatrixBase::NumRows() != other.NumRows() || - MatrixBase::NumCols() != other.NumCols()) - Resize(other.NumRows(), other.NumCols(), kUndefined); - MatrixBase::CopyFromMat(other); - return *this; - } - - - private: - /// Deallocates memory and sets to empty matrix (dimension 0, 0). - void Destroy(); - - /// Init assumes the current class contents are invalid (i.e. junk or have - /// already been freed), and it sets the matrix to newly allocated memory with - /// the specified number of rows and columns. r == c == 0 is acceptable. The data - /// memory contents will be undefined. - void Init(const MatrixIndexT r, - const MatrixIndexT c, - const MatrixStrideType stride_type); + /// read from stream. + // Unlike one in base, allows resizing. + void Read(std::istream &in, bool binary); + + /// Remove a specified row. + void RemoveRow(MatrixIndexT i); + + /// Transpose the matrix. Works for non-square + /// matrices as well as square ones. + // void Transpose(); + + /// Distructor to free matrices. + ~Matrix() { Destroy(); } + + /// Sets matrix to a specified size (zero is OK as long as both r and c are + /// zero). The value of the new data depends on resize_type: + /// -if kSetZero, the new data will be zero + /// -if kUndefined, the new data will be undefined + /// -if kCopyData, the new data will be the same as the old data in any + /// shared positions, and zero elsewhere. + /// + /// You can set stride_type to kStrideEqualNumCols to force the stride + /// to equal the number of columns; by default it is set so that the stride + /// in bytes is a multiple of 16. + /// + /// This function takes time proportional to the number of data elements. + void Resize(const MatrixIndexT r, + const MatrixIndexT c, + MatrixResizeType resize_type = kSetZero, + MatrixStrideType stride_type = kDefaultStride); + + /// Assignment operator that takes MatrixBase. + Matrix &operator=(const MatrixBase &other) { + if (MatrixBase::NumRows() != other.NumRows() || + MatrixBase::NumCols() != other.NumCols()) + Resize(other.NumRows(), other.NumCols(), kUndefined); + MatrixBase::CopyFromMat(other); + return *this; + } + + /// Assignment operator. Needed for inclusion in std::vector. + Matrix &operator=(const Matrix &other) { + if (MatrixBase::NumRows() != other.NumRows() || + MatrixBase::NumCols() != other.NumCols()) + Resize(other.NumRows(), other.NumCols(), kUndefined); + MatrixBase::CopyFromMat(other); + return *this; + } + + + private: + /// Deallocates memory and sets to empty matrix (dimension 0, 0). + void Destroy(); + + /// Init assumes the current class contents are invalid (i.e. junk or have + /// already been freed), and it sets the matrix to newly allocated memory + /// with + /// the specified number of rows and columns. r == c == 0 is acceptable. + /// The data + /// memory contents will be undefined. + void Init(const MatrixIndexT r, + const MatrixIndexT c, + const MatrixStrideType stride_type); }; /// @} end "addtogroup matrix_group" @@ -710,38 +756,38 @@ class Matrix : public MatrixBase { /// A structure containing the HTK header. /// [TODO: change the style of the variables to Kaldi-compliant] -template +template class SubMatrix : public MatrixBase { - public: - // Initialize a SubMatrix from part of a matrix; this is - // a bit like A(b:c, d:e) in Matlab. - // This initializer is against the proper semantics of "const", since - // SubMatrix can change its contents. It would be hard to implement - // a "const-safe" version of this class. - SubMatrix(const MatrixBase& T, - const MatrixIndexT ro, // row offset, 0 < ro < NumRows() - const MatrixIndexT r, // number of rows, r > 0 - const MatrixIndexT co, // column offset, 0 < co < NumCols() - const MatrixIndexT c); // number of columns, c > 0 - - // This initializer is mostly intended for use in CuMatrix and related - // classes. Be careful! - SubMatrix(Real *data, - MatrixIndexT num_rows, - MatrixIndexT num_cols, - MatrixIndexT stride); - - ~SubMatrix() {} - - /// This type of constructor is needed for Range() to work [in Matrix base - /// class]. Cannot make it explicit. - SubMatrix (const SubMatrix &other): - MatrixBase (other.data_, other.num_cols_, other.num_rows_, - other.stride_) {} - - private: - /// Disallow assignment. - SubMatrix &operator = (const SubMatrix &other); + public: + // Initialize a SubMatrix from part of a matrix; this is + // a bit like A(b:c, d:e) in Matlab. + // This initializer is against the proper semantics of "const", since + // SubMatrix can change its contents. It would be hard to implement + // a "const-safe" version of this class. + SubMatrix(const MatrixBase &T, + const MatrixIndexT ro, // row offset, 0 < ro < NumRows() + const MatrixIndexT r, // number of rows, r > 0 + const MatrixIndexT co, // column offset, 0 < co < NumCols() + const MatrixIndexT c); // number of columns, c > 0 + + // This initializer is mostly intended for use in CuMatrix and related + // classes. Be careful! + SubMatrix(Real *data, + MatrixIndexT num_rows, + MatrixIndexT num_cols, + MatrixIndexT stride); + + ~SubMatrix() {} + + /// This type of constructor is needed for Range() to work [in Matrix base + /// class]. Cannot make it explicit. + SubMatrix(const SubMatrix &other) + : MatrixBase( + other.data_, other.num_cols_, other.num_rows_, other.stride_) {} + + private: + /// Disallow assignment. + SubMatrix &operator=(const SubMatrix &other); }; /// @} End of "addtogroup matrix_funcs_io". @@ -794,25 +840,33 @@ Real TraceMatMatMatMat(const MatrixBase &A, MatrixTransposeType transA, /// the same as U->NumCols(), and we sort s from greatest to least absolute /// value (if sort_on_absolute_value == true) or greatest to least value /// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it -/// exists, around in the same way. Note: the "absolute value" part won't matter +/// exists, around in the same way. Note: the "absolute value" part won't +matter /// if this is an actual SVD, since singular values are non-negative. template void SortSvd(VectorBase *s, MatrixBase *U, MatrixBase* Vt = NULL, bool sort_on_absolute_value = true); -/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig. +/// Creates the eigenvalue matrix D that is part of the decomposition used +Matrix::Eig. /// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2 -/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a corresponding +/// for complex pairs. If a complex pair is lambda +- i*mu, D will have a +corresponding /// 2x2 block [lambda, mu; -mu, lambda]. -/// This function will throw if any complex eigenvalues are not in complex conjugate +/// This function will throw if any complex eigenvalues are not in complex +conjugate /// pairs (or the members of such pairs are not consecutively numbered). template -void CreateEigenvalueMatrix(const VectorBase &real, const VectorBase &imag, +void CreateEigenvalueMatrix(const VectorBase &real, const VectorBase +&imag, MatrixBase *D); -/// The following function is used in Matrix::Power, and separately tested, so we -/// declare it here mainly for the testing code to see. It takes a complex value to -/// a power using a method that will work for noninteger powers (but will fail if the +/// The following function is used in Matrix::Power, and separately tested, so +we +/// declare it here mainly for the testing code to see. It takes a complex +value to +/// a power using a method that will work for noninteger powers (but will fail +if the /// complex value is real and negative). template bool AttemptComplexPower(Real *x_re, Real *x_im, Real power); @@ -823,19 +877,19 @@ bool AttemptComplexPower(Real *x_re, Real *x_im, Real power); /// \addtogroup matrix_funcs_io /// @{ -template -std::ostream & operator << (std::ostream & Out, const MatrixBase & M); +template +std::ostream &operator<<(std::ostream &Out, const MatrixBase &M); -template -std::istream & operator >> (std::istream & In, MatrixBase & M); +template +std::istream &operator>>(std::istream &In, MatrixBase &M); // The Matrix read allows resizing, so we override the MatrixBase one. -template -std::istream & operator >> (std::istream & In, Matrix & M); +template +std::istream &operator>>(std::istream &In, Matrix &M); -template +template bool SameDim(const MatrixBase &M, const MatrixBase &N) { - return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols()); + return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols()); } /// @} end of \addtogroup matrix_funcs_io @@ -844,7 +898,6 @@ bool SameDim(const MatrixBase &M, const MatrixBase &N) { } // namespace kaldi - // we need to include the implementation and some // template specializations. #include "matrix/kaldi-matrix-inl.h" diff --git a/runtime/engine/common/matrix/kaldi-vector-inl.h b/runtime/engine/common/matrix/kaldi-vector-inl.h index 826202764..b3075e590 100644 --- a/runtime/engine/common/matrix/kaldi-vector-inl.h +++ b/runtime/engine/common/matrix/kaldi-vector-inl.h @@ -26,32 +26,33 @@ namespace kaldi { -template -std::ostream & operator << (std::ostream &os, const VectorBase &rv) { - rv.Write(os, false); - return os; +template +std::ostream &operator<<(std::ostream &os, const VectorBase &rv) { + rv.Write(os, false); + return os; } -template -std::istream &operator >> (std::istream &is, VectorBase &rv) { - rv.Read(is, false); - return is; +template +std::istream &operator>>(std::istream &is, VectorBase &rv) { + rv.Read(is, false); + return is; } -template -std::istream &operator >> (std::istream &is, Vector &rv) { - rv.Read(is, false); - return is; +template +std::istream &operator>>(std::istream &is, Vector &rv) { + rv.Read(is, false); + return is; } -//template<> -//template<> -//void VectorBase::AddVec(const float alpha, const VectorBase &rv); +// template<> +// template<> +// void VectorBase::AddVec(const float alpha, const VectorBase +// &rv); -//template<> -//template<> -//void VectorBase::AddVec(const double alpha, - //const VectorBase &rv); +// template<> +// template<> +// void VectorBase::AddVec(const double alpha, +// const VectorBase &rv); } // namespace kaldi diff --git a/runtime/engine/common/matrix/kaldi-vector.cc b/runtime/engine/common/matrix/kaldi-vector.cc index 9f2bd08e6..90817f0d4 100644 --- a/runtime/engine/common/matrix/kaldi-vector.cc +++ b/runtime/engine/common/matrix/kaldi-vector.cc @@ -23,80 +23,85 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include "matrix/kaldi-vector.h" #include #include -#include "matrix/kaldi-vector.h" #include "matrix/kaldi-matrix.h" namespace kaldi { -template +template inline void Vector::Init(const MatrixIndexT dim) { - KALDI_ASSERT(dim >= 0); - if (dim == 0) { - this->dim_ = 0; - this->data_ = NULL; - return; - } - MatrixIndexT size; - void *data; - void *free_data; + KALDI_ASSERT(dim >= 0); + if (dim == 0) { + this->dim_ = 0; + this->data_ = NULL; + return; + } + MatrixIndexT size; + void *data; + void *free_data; - size = dim * sizeof(Real); + size = dim * sizeof(Real); - if ((data = KALDI_MEMALIGN(16, size, &free_data)) != NULL) { - this->data_ = static_cast (data); - this->dim_ = dim; - } else { - throw std::bad_alloc(); - } + if ((data = KALDI_MEMALIGN(16, size, &free_data)) != NULL) { + this->data_ = static_cast(data); + this->dim_ = dim; + } else { + throw std::bad_alloc(); + } } -template -void Vector::Resize(const MatrixIndexT dim, MatrixResizeType resize_type) { - - // the next block uses recursion to handle what we have to do if - // resize_type == kCopyData. - if (resize_type == kCopyData) { - if (this->data_ == NULL || dim == 0) resize_type = kSetZero; // nothing to copy. - else if (this->dim_ == dim) { return; } // nothing to do. - else { - // set tmp to a vector of the desired size. - Vector tmp(dim, kUndefined); - if (dim > this->dim_) { - memcpy(tmp.data_, this->data_, sizeof(Real)*this->dim_); - memset(tmp.data_+this->dim_, 0, sizeof(Real)*(dim-this->dim_)); - } else { - memcpy(tmp.data_, this->data_, sizeof(Real)*dim); - } - tmp.Swap(this); - // and now let tmp go out of scope, deleting what was in *this. - return; +template +void Vector::Resize(const MatrixIndexT dim, + MatrixResizeType resize_type) { + // the next block uses recursion to handle what we have to do if + // resize_type == kCopyData. + if (resize_type == kCopyData) { + if (this->data_ == NULL || dim == 0) + resize_type = kSetZero; // nothing to copy. + else if (this->dim_ == dim) { + return; + } // nothing to do. + else { + // set tmp to a vector of the desired size. + Vector tmp(dim, kUndefined); + if (dim > this->dim_) { + memcpy(tmp.data_, this->data_, sizeof(Real) * this->dim_); + memset(tmp.data_ + this->dim_, + 0, + sizeof(Real) * (dim - this->dim_)); + } else { + memcpy(tmp.data_, this->data_, sizeof(Real) * dim); + } + tmp.Swap(this); + // and now let tmp go out of scope, deleting what was in *this. + return; + } } - } - // At this point, resize_type == kSetZero or kUndefined. + // At this point, resize_type == kSetZero or kUndefined. - if (this->data_ != NULL) { - if (this->dim_ == dim) { - if (resize_type == kSetZero) this->SetZero(); - return; - } else { - Destroy(); + if (this->data_ != NULL) { + if (this->dim_ == dim) { + if (resize_type == kSetZero) this->SetZero(); + return; + } else { + Destroy(); + } } - } - Init(dim); - if (resize_type == kSetZero) this->SetZero(); + Init(dim); + if (resize_type == kSetZero) this->SetZero(); } /// Copy data from another vector -template +template void VectorBase::CopyFromVec(const VectorBase &v) { - KALDI_ASSERT(Dim() == v.Dim()); - if (data_ != v.data_) { - std::memcpy(this->data_, v.data_, dim_ * sizeof(Real)); - } + KALDI_ASSERT(Dim() == v.Dim()); + if (data_ != v.data_) { + std::memcpy(this->data_, v.data_, dim_ * sizeof(Real)); + } } /* @@ -107,10 +112,14 @@ void VectorBase::CopyFromPacked(const PackedMatrix& M) { this->CopyFromVec(v); } // instantiate the template. -template void VectorBase::CopyFromPacked(const PackedMatrix &other); -template void VectorBase::CopyFromPacked(const PackedMatrix &other); -template void VectorBase::CopyFromPacked(const PackedMatrix &other); -template void VectorBase::CopyFromPacked(const PackedMatrix &other); +template void VectorBase::CopyFromPacked(const PackedMatrix +&other); +template void VectorBase::CopyFromPacked(const PackedMatrix +&other); +template void VectorBase::CopyFromPacked(const PackedMatrix +&other); +template void VectorBase::CopyFromPacked(const PackedMatrix +&other); /// Load data into the vector template @@ -119,50 +128,48 @@ void VectorBase::CopyFromPtr(const Real *data, MatrixIndexT sz) { std::memcpy(this->data_, data, Dim() * sizeof(Real)); }*/ -template -template +template +template void VectorBase::CopyFromVec(const VectorBase &other) { - KALDI_ASSERT(dim_ == other.Dim()); - Real * __restrict__ ptr = data_; - const OtherReal * __restrict__ other_ptr = other.Data(); - for (MatrixIndexT i = 0; i < dim_; i++) - ptr[i] = other_ptr[i]; + KALDI_ASSERT(dim_ == other.Dim()); + Real *__restrict__ ptr = data_; + const OtherReal *__restrict__ other_ptr = other.Data(); + for (MatrixIndexT i = 0; i < dim_; i++) ptr[i] = other_ptr[i]; } template void VectorBase::CopyFromVec(const VectorBase &other); template void VectorBase::CopyFromVec(const VectorBase &other); // Remove element from the vector. The vector is not reallocated -template +template void Vector::RemoveElement(MatrixIndexT i) { - KALDI_ASSERT(i < this->dim_ && "Access out of vector"); - for (MatrixIndexT j = i + 1; j < this->dim_; j++) - this->data_[j-1] = this->data_[j]; - this->dim_--; + KALDI_ASSERT(i < this->dim_ && "Access out of vector"); + for (MatrixIndexT j = i + 1; j < this->dim_; j++) + this->data_[j - 1] = this->data_[j]; + this->dim_--; } /// Deallocates memory and sets object to empty vector. -template +template void Vector::Destroy() { - /// we need to free the data block if it was defined - if (this->data_ != NULL) - KALDI_MEMALIGN_FREE(this->data_); - this->data_ = NULL; - this->dim_ = 0; + /// we need to free the data block if it was defined + if (this->data_ != NULL) KALDI_MEMALIGN_FREE(this->data_); + this->data_ = NULL; + this->dim_ = 0; } -template +template void VectorBase::SetZero() { - std::memset(data_, 0, dim_ * sizeof(Real)); + std::memset(data_, 0, dim_ * sizeof(Real)); } -template +template bool VectorBase::IsZero(Real cutoff) const { - Real abs_max = 0.0; - for (MatrixIndexT i = 0; i < Dim(); i++) - abs_max = std::max(std::abs(data_[i]), abs_max); - return (abs_max <= cutoff); + Real abs_max = 0.0; + for (MatrixIndexT i = 0; i < Dim(); i++) + abs_max = std::max(std::abs(data_[i]), abs_max); + return (abs_max <= cutoff); } /* @@ -201,104 +208,107 @@ MatrixIndexT VectorBase::RandCategorical() const { // returns exactly 1, or due to roundoff. }*/ -template +template void VectorBase::Set(Real f) { - // Why not use memset here? - // The basic unit of memset is a byte. - // If f != 0 and sizeof(Real) > 1, then we cannot use memset. - if (f == 0) { - this->SetZero(); // calls std::memset - } else { - for (MatrixIndexT i = 0; i < dim_; i++) { data_[i] = f; } - } + // Why not use memset here? + // The basic unit of memset is a byte. + // If f != 0 and sizeof(Real) > 1, then we cannot use memset. + if (f == 0) { + this->SetZero(); // calls std::memset + } else { + for (MatrixIndexT i = 0; i < dim_; i++) { + data_[i] = f; + } + } } -template +template void VectorBase::CopyRowsFromMat(const MatrixBase &mat) { - KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); + KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); - Real *inc_data = data_; - const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(); + Real *inc_data = data_; + const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(); - if (mat.Stride() == mat.NumCols()) { - memcpy(inc_data, mat.Data(), cols*rows*sizeof(Real)); - } else { - for (MatrixIndexT i = 0; i < rows; i++) { - // copy the data to the propper position - memcpy(inc_data, mat.RowData(i), cols * sizeof(Real)); - // set new copy position - inc_data += cols; + if (mat.Stride() == mat.NumCols()) { + memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real)); + } else { + for (MatrixIndexT i = 0; i < rows; i++) { + // copy the data to the propper position + memcpy(inc_data, mat.RowData(i), cols * sizeof(Real)); + // set new copy position + inc_data += cols; + } } - } } -template -template +template +template void VectorBase::CopyRowsFromMat(const MatrixBase &mat) { - KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); - Real *vec_data = data_; - const MatrixIndexT cols = mat.NumCols(), - rows = mat.NumRows(); - - for (MatrixIndexT i = 0; i < rows; i++) { - const OtherReal *mat_row = mat.RowData(i); - for (MatrixIndexT j = 0; j < cols; j++) { - vec_data[j] = static_cast(mat_row[j]); + KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); + Real *vec_data = data_; + const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(); + + for (MatrixIndexT i = 0; i < rows; i++) { + const OtherReal *mat_row = mat.RowData(i); + for (MatrixIndexT j = 0; j < cols; j++) { + vec_data[j] = static_cast(mat_row[j]); + } + vec_data += cols; } - vec_data += cols; - } } -template -void VectorBase::CopyRowsFromMat(const MatrixBase &mat); -template -void VectorBase::CopyRowsFromMat(const MatrixBase &mat); +template void VectorBase::CopyRowsFromMat(const MatrixBase &mat); +template void VectorBase::CopyRowsFromMat(const MatrixBase &mat); -template +template void VectorBase::CopyColsFromMat(const MatrixBase &mat) { - KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); + KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows()); - Real* inc_data = data_; - const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(), stride = mat.Stride(); - const Real *mat_inc_data = mat.Data(); + Real *inc_data = data_; + const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(), + stride = mat.Stride(); + const Real *mat_inc_data = mat.Data(); - for (MatrixIndexT i = 0; i < cols; i++) { - for (MatrixIndexT j = 0; j < rows; j++) { - inc_data[j] = mat_inc_data[j*stride]; + for (MatrixIndexT i = 0; i < cols; i++) { + for (MatrixIndexT j = 0; j < rows; j++) { + inc_data[j] = mat_inc_data[j * stride]; + } + mat_inc_data++; + inc_data += rows; } - mat_inc_data++; - inc_data += rows; - } } -template -void VectorBase::CopyRowFromMat(const MatrixBase &mat, MatrixIndexT row) { - KALDI_ASSERT(row < mat.NumRows()); - KALDI_ASSERT(dim_ == mat.NumCols()); - const Real *mat_row = mat.RowData(row); - memcpy(data_, mat_row, sizeof(Real)*dim_); +template +void VectorBase::CopyRowFromMat(const MatrixBase &mat, + MatrixIndexT row) { + KALDI_ASSERT(row < mat.NumRows()); + KALDI_ASSERT(dim_ == mat.NumCols()); + const Real *mat_row = mat.RowData(row); + memcpy(data_, mat_row, sizeof(Real) * dim_); } -template -template -void VectorBase::CopyRowFromMat(const MatrixBase &mat, MatrixIndexT row) { - KALDI_ASSERT(row < mat.NumRows()); - KALDI_ASSERT(dim_ == mat.NumCols()); - const OtherReal *mat_row = mat.RowData(row); - for (MatrixIndexT i = 0; i < dim_; i++) - data_[i] = static_cast(mat_row[i]); +template +template +void VectorBase::CopyRowFromMat(const MatrixBase &mat, + MatrixIndexT row) { + KALDI_ASSERT(row < mat.NumRows()); + KALDI_ASSERT(dim_ == mat.NumCols()); + const OtherReal *mat_row = mat.RowData(row); + for (MatrixIndexT i = 0; i < dim_; i++) + data_[i] = static_cast(mat_row[i]); } -template -void VectorBase::CopyRowFromMat(const MatrixBase &mat, MatrixIndexT row); -template -void VectorBase::CopyRowFromMat(const MatrixBase &mat, MatrixIndexT row); +template void VectorBase::CopyRowFromMat(const MatrixBase &mat, + MatrixIndexT row); +template void VectorBase::CopyRowFromMat(const MatrixBase &mat, + MatrixIndexT row); /* template template -void VectorBase::CopyRowFromSp(const SpMatrix &sp, MatrixIndexT row) { +void VectorBase::CopyRowFromSp(const SpMatrix &sp, MatrixIndexT +row) { KALDI_ASSERT(row < sp.NumRows()); KALDI_ASSERT(dim_ == sp.NumCols()); @@ -313,13 +323,17 @@ void VectorBase::CopyRowFromSp(const SpMatrix &sp, MatrixIndexT } template -void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT row); +void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT +row); template -void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT row); +void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT +row); template -void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT row); +void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT +row); template -void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT row); +void VectorBase::CopyRowFromSp(const SpMatrix &mat, MatrixIndexT +row); // takes absolute value of the elements to a power. // Throws exception if could not (but only for power != 1 and power != 2). @@ -333,7 +347,8 @@ void VectorBase::ApplyPowAbs(Real power, bool include_sign) { data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * data_[i] * data_[i]; } else if (power == 0.5) { for (MatrixIndexT i = 0; i < dim_; i++) { - data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * std::sqrt(std::abs(data_[i])); + data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * +std::sqrt(std::abs(data_[i])); } } else if (power < 0.0) { for (MatrixIndexT i = 0; i < dim_; i++) { @@ -346,7 +361,8 @@ void VectorBase::ApplyPowAbs(Real power, bool include_sign) { } } else { for (MatrixIndexT i = 0; i < dim_; i++) { - data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * pow(std::abs(data_[i]), power); + data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * +pow(std::abs(data_[i]), power); if (data_[i] == HUGE_VAL) { // HUGE_VAL is what errno returns on error. KALDI_ERR << "Could not raise element " << i << "to power " << power << ": returned value = " << data_[i]; @@ -401,7 +417,8 @@ Real VectorBase::Norm(Real p) const { } template -bool VectorBase::ApproxEqual(const VectorBase &other, float tol) const { +bool VectorBase::ApproxEqual(const VectorBase &other, float tol) +const { if (dim_ != other.dim_) KALDI_ERR << "ApproxEqual: size mismatch " << dim_ << " vs. " << other.dim_; KALDI_ASSERT(tol >= 0.0); @@ -499,677 +516,716 @@ Real VectorBase::Min(MatrixIndexT *index_out) const { }*/ -template -template -void VectorBase::CopyColFromMat(const MatrixBase &mat, MatrixIndexT col) { - KALDI_ASSERT(col < mat.NumCols()); - KALDI_ASSERT(dim_ == mat.NumRows()); - for (MatrixIndexT i = 0; i < dim_; i++) - data_[i] = mat(i, col); - // can't do this very efficiently so don't really bother. could improve this though. +template +template +void VectorBase::CopyColFromMat(const MatrixBase &mat, + MatrixIndexT col) { + KALDI_ASSERT(col < mat.NumCols()); + KALDI_ASSERT(dim_ == mat.NumRows()); + for (MatrixIndexT i = 0; i < dim_; i++) data_[i] = mat(i, col); + // can't do this very efficiently so don't really bother. could improve this + // though. } // instantiate the template above. -template -void VectorBase::CopyColFromMat(const MatrixBase &mat, MatrixIndexT col); -template -void VectorBase::CopyColFromMat(const MatrixBase &mat, MatrixIndexT col); -template -void VectorBase::CopyColFromMat(const MatrixBase &mat, MatrixIndexT col); -template -void VectorBase::CopyColFromMat(const MatrixBase &mat, MatrixIndexT col); - -//template -//void VectorBase::CopyDiagFromMat(const MatrixBase &M) { - //KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); - //cblas_Xcopy(dim_, M.Data(), M.Stride() + 1, data_, 1); -//} - -//template -//void VectorBase::CopyDiagFromPacked(const PackedMatrix &M) { - //KALDI_ASSERT(dim_ == M.NumCols()); - //for (MatrixIndexT i = 0; i < dim_; i++) - //data_[i] = M(i, i); - //// could make this more efficient. -//} - -//template -//Real VectorBase::Sum() const { - //// Do a dot-product with a size-1 array with a stride of 0 to - //// implement sum. This allows us to access SIMD operations in a - //// cross-platform way via your BLAS library. - //Real one(1); - //return cblas_Xdot(dim_, data_, 1, &one, 0); -//} - -//template -//Real VectorBase::SumLog() const { - //double sum_log = 0.0; - //double prod = 1.0; - //for (MatrixIndexT i = 0; i < dim_; i++) { - //prod *= data_[i]; - //// Possible future work (arnab): change these magic values to pre-defined - //// constants - //if (prod < 1.0e-10 || prod > 1.0e+10) { - //sum_log += Log(prod); - //prod = 1.0; - //} - //} - //if (prod != 1.0) sum_log += Log(prod); - //return sum_log; -//} - -//template -//void VectorBase::AddRowSumMat(Real alpha, const MatrixBase &M, Real beta) { - //KALDI_ASSERT(dim_ == M.NumCols()); - //MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_; - //Real *data = data_; - - //// implement the function according to a dimension cutoff for computation efficiency - //if (num_rows <= 64) { - //cblas_Xscal(dim, beta, data, 1); - //const Real *m_data = M.Data(); - //for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride) - //cblas_Xaxpy(dim, alpha, m_data, 1, data, 1); - - //} else { - //Vector ones(M.NumRows()); - //ones.Set(1.0); - //this->AddMatVec(alpha, M, kTrans, ones, beta); - //} -//} - -//template -//void VectorBase::AddColSumMat(Real alpha, const MatrixBase &M, Real beta) { - //KALDI_ASSERT(dim_ == M.NumRows()); - //MatrixIndexT num_cols = M.NumCols(); - - //// implement the function according to a dimension cutoff for computation efficiency - //if (num_cols <= 64) { - //for (MatrixIndexT i = 0; i < dim_; i++) { - //double sum = 0.0; - //const Real *src = M.RowData(i); - //for (MatrixIndexT j = 0; j < num_cols; j++) - //sum += src[j]; - //data_[i] = alpha * sum + beta * data_[i]; - //} - //} else { - //Vector ones(M.NumCols()); - //ones.Set(1.0); - //this->AddMatVec(alpha, M, kNoTrans, ones, beta); - //} -//} - -//template -//Real VectorBase::LogSumExp(Real prune) const { - //Real sum; - //if (sizeof(sum) == 8) sum = kLogZeroDouble; - //else sum = kLogZeroFloat; - //Real max_elem = Max(), cutoff; - //if (sizeof(Real) == 4) cutoff = max_elem + kMinLogDiffFloat; - //else cutoff = max_elem + kMinLogDiffDouble; - //if (prune > 0.0 && max_elem - prune > cutoff) // explicit pruning... - //cutoff = max_elem - prune; - - //double sum_relto_max_elem = 0.0; - - //for (MatrixIndexT i = 0; i < dim_; i++) { - //BaseFloat f = data_[i]; - //if (f >= cutoff) - //sum_relto_max_elem += Exp(f - max_elem); - //} - //return max_elem + Log(sum_relto_max_elem); -//} - -//template -//void VectorBase::InvertElements() { - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] = static_cast(1 / data_[i]); - //} -//} - -//template -//void VectorBase::ApplyLog() { - //for (MatrixIndexT i = 0; i < dim_; i++) { - //if (data_[i] < 0.0) - //KALDI_ERR << "Trying to take log of a negative number."; - //data_[i] = Log(data_[i]); - //} -//} - -//template -//void VectorBase::ApplyLogAndCopy(const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.Dim()); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] = Log(v(i)); - //} -//} - -//template -//void VectorBase::ApplyExp() { - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] = Exp(data_[i]); - //} -//} - -//template -//void VectorBase::ApplyAbs() { - //for (MatrixIndexT i = 0; i < dim_; i++) { data_[i] = std::abs(data_[i]); } -//} - -//template -//void VectorBase::Floor(const VectorBase &v, Real floor_val, MatrixIndexT *floored_count) { - //KALDI_ASSERT(dim_ == v.dim_); - //if (floored_count == nullptr) { - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] = std::max(v.data_[i], floor_val); - //} - //} else { - //MatrixIndexT num_floored = 0; - //for (MatrixIndexT i = 0; i < dim_; i++) { - //if (v.data_[i] < floor_val) { - //data_[i] = floor_val; - //num_floored++; - //} else { - //data_[i] = v.data_[i]; - //} - //} - //*floored_count = num_floored; - //} -//} - -//template -//void VectorBase::Ceiling(const VectorBase &v, Real ceil_val, MatrixIndexT *ceiled_count) { - //KALDI_ASSERT(dim_ == v.dim_); - //if (ceiled_count == nullptr) { - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] = std::min(v.data_[i], ceil_val); - //} - //} else { - //MatrixIndexT num_changed = 0; - //for (MatrixIndexT i = 0; i < dim_; i++) { - //if (v.data_[i] > ceil_val) { - //data_[i] = ceil_val; - //num_changed++; - //} else { - //data_[i] = v.data_[i]; - //} - //} - //*ceiled_count = num_changed; - //} -//} - -//template -//MatrixIndexT VectorBase::ApplyFloor(const VectorBase &floor_vec) { - //KALDI_ASSERT(floor_vec.Dim() == dim_); - //MatrixIndexT num_floored = 0; - //for (MatrixIndexT i = 0; i < dim_; i++) { - //if (data_[i] < floor_vec(i)) { - //data_[i] = floor_vec(i); - //num_floored++; - //} - //} - //return num_floored; -//} - -//template -//Real VectorBase::ApplySoftMax() { - //Real max = this->Max(), sum = 0.0; - //for (MatrixIndexT i = 0; i < dim_; i++) { - //sum += (data_[i] = Exp(data_[i] - max)); - //} - //this->Scale(1.0 / sum); - //return max + Log(sum); -//} - -//template -//Real VectorBase::ApplyLogSoftMax() { - //Real max = this->Max(), sum = 0.0; - //for (MatrixIndexT i = 0; i < dim_; i++) { - //sum += Exp((data_[i] -= max)); - //} - //sum = Log(sum); - //this->Add(-1.0 * sum); - //return max + sum; +template void VectorBase::CopyColFromMat(const MatrixBase &mat, + MatrixIndexT col); +template void VectorBase::CopyColFromMat(const MatrixBase &mat, + MatrixIndexT col); +template void VectorBase::CopyColFromMat(const MatrixBase &mat, + MatrixIndexT col); +template void VectorBase::CopyColFromMat(const MatrixBase &mat, + MatrixIndexT col); + +// template +// void VectorBase::CopyDiagFromMat(const MatrixBase &M) { +// KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); +// cblas_Xcopy(dim_, M.Data(), M.Stride() + 1, data_, 1); +//} + +// template +// void VectorBase::CopyDiagFromPacked(const PackedMatrix &M) { +// KALDI_ASSERT(dim_ == M.NumCols()); +// for (MatrixIndexT i = 0; i < dim_; i++) +// data_[i] = M(i, i); +//// could make this more efficient. +//} + +// template +// Real VectorBase::Sum() const { +//// Do a dot-product with a size-1 array with a stride of 0 to +//// implement sum. This allows us to access SIMD operations in a +//// cross-platform way via your BLAS library. +// Real one(1); +// return cblas_Xdot(dim_, data_, 1, &one, 0); +//} + +// template +// Real VectorBase::SumLog() const { +// double sum_log = 0.0; +// double prod = 1.0; +// for (MatrixIndexT i = 0; i < dim_; i++) { +// prod *= data_[i]; +//// Possible future work (arnab): change these magic values to pre-defined +//// constants +// if (prod < 1.0e-10 || prod > 1.0e+10) { +// sum_log += Log(prod); +// prod = 1.0; +//} +//} +// if (prod != 1.0) sum_log += Log(prod); +// return sum_log; +//} + +// template +// void VectorBase::AddRowSumMat(Real alpha, const MatrixBase &M, +// Real beta) { +// KALDI_ASSERT(dim_ == M.NumCols()); +// MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_; +// Real *data = data_; + +//// implement the function according to a dimension cutoff for computation +///efficiency +// if (num_rows <= 64) { +// cblas_Xscal(dim, beta, data, 1); +// const Real *m_data = M.Data(); +// for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride) +// cblas_Xaxpy(dim, alpha, m_data, 1, data, 1); + +//} else { +// Vector ones(M.NumRows()); +// ones.Set(1.0); +// this->AddMatVec(alpha, M, kTrans, ones, beta); +//} +//} + +// template +// void VectorBase::AddColSumMat(Real alpha, const MatrixBase &M, +// Real beta) { +// KALDI_ASSERT(dim_ == M.NumRows()); +// MatrixIndexT num_cols = M.NumCols(); + +//// implement the function according to a dimension cutoff for computation +///efficiency +// if (num_cols <= 64) { +// for (MatrixIndexT i = 0; i < dim_; i++) { +// double sum = 0.0; +// const Real *src = M.RowData(i); +// for (MatrixIndexT j = 0; j < num_cols; j++) +// sum += src[j]; +// data_[i] = alpha * sum + beta * data_[i]; +//} +//} else { +// Vector ones(M.NumCols()); +// ones.Set(1.0); +// this->AddMatVec(alpha, M, kNoTrans, ones, beta); +//} +//} + +// template +// Real VectorBase::LogSumExp(Real prune) const { +// Real sum; +// if (sizeof(sum) == 8) sum = kLogZeroDouble; +// else sum = kLogZeroFloat; +// Real max_elem = Max(), cutoff; +// if (sizeof(Real) == 4) cutoff = max_elem + kMinLogDiffFloat; +// else cutoff = max_elem + kMinLogDiffDouble; +// if (prune > 0.0 && max_elem - prune > cutoff) // explicit pruning... +// cutoff = max_elem - prune; + +// double sum_relto_max_elem = 0.0; + +// for (MatrixIndexT i = 0; i < dim_; i++) { +// BaseFloat f = data_[i]; +// if (f >= cutoff) +// sum_relto_max_elem += Exp(f - max_elem); +//} +// return max_elem + Log(sum_relto_max_elem); +//} + +// template +// void VectorBase::InvertElements() { +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] = static_cast(1 / data_[i]); +//} +//} + +// template +// void VectorBase::ApplyLog() { +// for (MatrixIndexT i = 0; i < dim_; i++) { +// if (data_[i] < 0.0) +// KALDI_ERR << "Trying to take log of a negative number."; +// data_[i] = Log(data_[i]); +//} +//} + +// template +// void VectorBase::ApplyLogAndCopy(const VectorBase &v) { +// KALDI_ASSERT(dim_ == v.Dim()); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] = Log(v(i)); +//} +//} + +// template +// void VectorBase::ApplyExp() { +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] = Exp(data_[i]); +//} +//} + +// template +// void VectorBase::ApplyAbs() { +// for (MatrixIndexT i = 0; i < dim_; i++) { data_[i] = std::abs(data_[i]); } +//} + +// template +// void VectorBase::Floor(const VectorBase &v, Real floor_val, +// MatrixIndexT *floored_count) { +// KALDI_ASSERT(dim_ == v.dim_); +// if (floored_count == nullptr) { +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] = std::max(v.data_[i], floor_val); +//} +//} else { +// MatrixIndexT num_floored = 0; +// for (MatrixIndexT i = 0; i < dim_; i++) { +// if (v.data_[i] < floor_val) { +// data_[i] = floor_val; +// num_floored++; +//} else { +// data_[i] = v.data_[i]; +//} +//} +//*floored_count = num_floored; +//} +//} + +// template +// void VectorBase::Ceiling(const VectorBase &v, Real ceil_val, +// MatrixIndexT *ceiled_count) { +// KALDI_ASSERT(dim_ == v.dim_); +// if (ceiled_count == nullptr) { +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] = std::min(v.data_[i], ceil_val); +//} +//} else { +// MatrixIndexT num_changed = 0; +// for (MatrixIndexT i = 0; i < dim_; i++) { +// if (v.data_[i] > ceil_val) { +// data_[i] = ceil_val; +// num_changed++; +//} else { +// data_[i] = v.data_[i]; +//} +//} +//*ceiled_count = num_changed; +//} +//} + +// template +// MatrixIndexT VectorBase::ApplyFloor(const VectorBase &floor_vec) +// { +// KALDI_ASSERT(floor_vec.Dim() == dim_); +// MatrixIndexT num_floored = 0; +// for (MatrixIndexT i = 0; i < dim_; i++) { +// if (data_[i] < floor_vec(i)) { +// data_[i] = floor_vec(i); +// num_floored++; +//} +//} +// return num_floored; +//} + +// template +// Real VectorBase::ApplySoftMax() { +// Real max = this->Max(), sum = 0.0; +// for (MatrixIndexT i = 0; i < dim_; i++) { +// sum += (data_[i] = Exp(data_[i] - max)); +//} +// this->Scale(1.0 / sum); +// return max + Log(sum); +//} + +// template +// Real VectorBase::ApplyLogSoftMax() { +// Real max = this->Max(), sum = 0.0; +// for (MatrixIndexT i = 0; i < dim_; i++) { +// sum += Exp((data_[i] -= max)); +//} +// sum = Log(sum); +// this->Add(-1.0 * sum); +// return max + sum; //} //#ifdef HAVE_MKL -//template<> -//void VectorBase::Tanh(const VectorBase &src) { - //KALDI_ASSERT(dim_ == src.dim_); - //vsTanh(dim_, src.data_, data_); +// template<> +// void VectorBase::Tanh(const VectorBase &src) { +// KALDI_ASSERT(dim_ == src.dim_); +// vsTanh(dim_, src.data_, data_); //} -//template<> -//void VectorBase::Tanh(const VectorBase &src) { - //KALDI_ASSERT(dim_ == src.dim_); - //vdTanh(dim_, src.data_, data_); +// template<> +// void VectorBase::Tanh(const VectorBase &src) { +// KALDI_ASSERT(dim_ == src.dim_); +// vdTanh(dim_, src.data_, data_); //} //#else -//template -//void VectorBase::Tanh(const VectorBase &src) { - //KALDI_ASSERT(dim_ == src.dim_); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //Real x = src.data_[i]; - //if (x > 0.0) { - //Real inv_expx = Exp(-x); - //x = -1.0 + 2.0 / (1.0 + inv_expx * inv_expx); - //} else { - //Real expx = Exp(x); - //x = 1.0 - 2.0 / (1.0 + expx * expx); - //} - //data_[i] = x; - //} +// template +// void VectorBase::Tanh(const VectorBase &src) { +// KALDI_ASSERT(dim_ == src.dim_); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// Real x = src.data_[i]; +// if (x > 0.0) { +// Real inv_expx = Exp(-x); +// x = -1.0 + 2.0 / (1.0 + inv_expx * inv_expx); +//} else { +// Real expx = Exp(x); +// x = 1.0 - 2.0 / (1.0 + expx * expx); +//} +// data_[i] = x; +//} //} //#endif //#ifdef HAVE_MKL //// Implementing sigmoid based on tanh. -//template<> -//void VectorBase::Sigmoid(const VectorBase &src) { - //KALDI_ASSERT(dim_ == src.dim_); - //this->CopyFromVec(src); - //this->Scale(0.5); - //vsTanh(dim_, data_, data_); - //this->Add(1.0); - //this->Scale(0.5); -//} -//template<> -//void VectorBase::Sigmoid(const VectorBase &src) { - //KALDI_ASSERT(dim_ == src.dim_); - //this->CopyFromVec(src); - //this->Scale(0.5); - //vdTanh(dim_, data_, data_); - //this->Add(1.0); - //this->Scale(0.5); +// template<> +// void VectorBase::Sigmoid(const VectorBase &src) { +// KALDI_ASSERT(dim_ == src.dim_); +// this->CopyFromVec(src); +// this->Scale(0.5); +// vsTanh(dim_, data_, data_); +// this->Add(1.0); +// this->Scale(0.5); +//} +// template<> +// void VectorBase::Sigmoid(const VectorBase &src) { +// KALDI_ASSERT(dim_ == src.dim_); +// this->CopyFromVec(src); +// this->Scale(0.5); +// vdTanh(dim_, data_, data_); +// this->Add(1.0); +// this->Scale(0.5); //} //#else -//template -//void VectorBase::Sigmoid(const VectorBase &src) { - //KALDI_ASSERT(dim_ == src.dim_); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //Real x = src.data_[i]; - //// We aim to avoid floating-point overflow here. - //if (x > 0.0) { - //x = 1.0 / (1.0 + Exp(-x)); - //} else { - //Real ex = Exp(x); - //x = ex / (ex + 1.0); - //} - //data_[i] = x; - //} +// template +// void VectorBase::Sigmoid(const VectorBase &src) { +// KALDI_ASSERT(dim_ == src.dim_); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// Real x = src.data_[i]; +//// We aim to avoid floating-point overflow here. +// if (x > 0.0) { +// x = 1.0 / (1.0 + Exp(-x)); +//} else { +// Real ex = Exp(x); +// x = ex / (ex + 1.0); +//} +// data_[i] = x; +//} //} //#endif -//template -//void VectorBase::Add(Real c) { - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] += c; - //} +// template +// void VectorBase::Add(Real c) { +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] += c; +//} //} -//template -//void VectorBase::Scale(Real alpha) { - //cblas_Xscal(dim_, alpha, data_, 1); +// template +// void VectorBase::Scale(Real alpha) { +// cblas_Xscal(dim_, alpha, data_, 1); //} -//template -//void VectorBase::MulElements(const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.dim_); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] *= v.data_[i]; - //} +// template +// void VectorBase::MulElements(const VectorBase &v) { +// KALDI_ASSERT(dim_ == v.dim_); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] *= v.data_[i]; +//} //} -//template // Set each element to y = (x == orig ? changed : x). -//void VectorBase::ReplaceValue(Real orig, Real changed) { - //Real *data = data_; - //for (MatrixIndexT i = 0; i < dim_; i++) - //if (data[i] == orig) data[i] = changed; +// template // Set each element to y = (x == orig ? changed : +// x). +// void VectorBase::ReplaceValue(Real orig, Real changed) { +// Real *data = data_; +// for (MatrixIndexT i = 0; i < dim_; i++) +// if (data[i] == orig) data[i] = changed; //} -//template -//template -//void VectorBase::MulElements(const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.Dim()); - //const OtherReal *other_ptr = v.Data(); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] *= other_ptr[i]; - //} +// template +// template +// void VectorBase::MulElements(const VectorBase &v) { +// KALDI_ASSERT(dim_ == v.Dim()); +// const OtherReal *other_ptr = v.Data(); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] *= other_ptr[i]; +//} //} //// instantiate template. -//template -//void VectorBase::MulElements(const VectorBase &v); -//template -//void VectorBase::MulElements(const VectorBase &v); - - -//template -//void VectorBase::AddVecVec(Real alpha, const VectorBase &v, - //const VectorBase &r, Real beta) { - //KALDI_ASSERT(v.data_ != this->data_ && r.data_ != this->data_); - //// We pretend that v is a band-diagonal matrix. - //KALDI_ASSERT(dim_ == v.dim_ && dim_ == r.dim_); - //cblas_Xgbmv(kNoTrans, dim_, dim_, 0, 0, alpha, v.data_, 1, - //r.data_, 1, beta, this->data_, 1); +// template +// void VectorBase::MulElements(const VectorBase &v); +// template +// void VectorBase::MulElements(const VectorBase &v); + + +// template +// void VectorBase::AddVecVec(Real alpha, const VectorBase &v, +// const VectorBase &r, Real beta) { +// KALDI_ASSERT(v.data_ != this->data_ && r.data_ != this->data_); +//// We pretend that v is a band-diagonal matrix. +// KALDI_ASSERT(dim_ == v.dim_ && dim_ == r.dim_); +// cblas_Xgbmv(kNoTrans, dim_, dim_, 0, 0, alpha, v.data_, 1, +// r.data_, 1, beta, this->data_, 1); //} -//template -//void VectorBase::DivElements(const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.dim_); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] /= v.data_[i]; - //} +// template +// void VectorBase::DivElements(const VectorBase &v) { +// KALDI_ASSERT(dim_ == v.dim_); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] /= v.data_[i]; +//} //} -//template -//template -//void VectorBase::DivElements(const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.Dim()); - //const OtherReal *other_ptr = v.Data(); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] /= other_ptr[i]; - //} +// template +// template +// void VectorBase::DivElements(const VectorBase &v) { +// KALDI_ASSERT(dim_ == v.Dim()); +// const OtherReal *other_ptr = v.Data(); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] /= other_ptr[i]; +//} //} //// instantiate template. -//template -//void VectorBase::DivElements(const VectorBase &v); -//template -//void VectorBase::DivElements(const VectorBase &v); - -//template -//void VectorBase::AddVecDivVec(Real alpha, const VectorBase &v, - //const VectorBase &rr, Real beta) { - //KALDI_ASSERT((dim_ == v.dim_ && dim_ == rr.dim_)); - //for (MatrixIndexT i = 0; i < dim_; i++) { - //data_[i] = alpha * v.data_[i]/rr.data_[i] + beta * data_[i] ; - //} -//} - -//template -//template -//void VectorBase::AddVec(const Real alpha, const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.dim_); - //// remove __restrict__ if it causes compilation problems. - //Real *__restrict__ data = data_; - //OtherReal *__restrict__ other_data = v.data_; - //MatrixIndexT dim = dim_; - //if (alpha != 1.0) - //for (MatrixIndexT i = 0; i < dim; i++) - //data[i] += alpha * other_data[i]; - //else - //for (MatrixIndexT i = 0; i < dim; i++) - //data[i] += other_data[i]; -//} - -//template -//void VectorBase::AddVec(const float alpha, const VectorBase &v); -//template -//void VectorBase::AddVec(const double alpha, const VectorBase &v); - -//template -//template -//void VectorBase::AddVec2(const Real alpha, const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.dim_); - //// remove __restrict__ if it causes compilation problems. - //Real *__restrict__ data = data_; - //OtherReal *__restrict__ other_data = v.data_; - //MatrixIndexT dim = dim_; - //if (alpha != 1.0) - //for (MatrixIndexT i = 0; i < dim; i++) - //data[i] += alpha * other_data[i] * other_data[i]; - //else - //for (MatrixIndexT i = 0; i < dim; i++) - //data[i] += other_data[i] * other_data[i]; -//} - -//template -//void VectorBase::AddVec2(const float alpha, const VectorBase &v); -//template -//void VectorBase::AddVec2(const double alpha, const VectorBase &v); +// template +// void VectorBase::DivElements(const VectorBase &v); +// template +// void VectorBase::DivElements(const VectorBase &v); + +// template +// void VectorBase::AddVecDivVec(Real alpha, const VectorBase &v, +// const VectorBase &rr, Real beta) { +// KALDI_ASSERT((dim_ == v.dim_ && dim_ == rr.dim_)); +// for (MatrixIndexT i = 0; i < dim_; i++) { +// data_[i] = alpha * v.data_[i]/rr.data_[i] + beta * data_[i] ; +//} +//} +// template +// template +// void VectorBase::AddVec(const Real alpha, const VectorBase +// &v) { +// KALDI_ASSERT(dim_ == v.dim_); +//// remove __restrict__ if it causes compilation problems. +// Real *__restrict__ data = data_; +// OtherReal *__restrict__ other_data = v.data_; +// MatrixIndexT dim = dim_; +// if (alpha != 1.0) +// for (MatrixIndexT i = 0; i < dim; i++) +// data[i] += alpha * other_data[i]; +// else +// for (MatrixIndexT i = 0; i < dim; i++) +// data[i] += other_data[i]; +//} -template -void VectorBase::Read(std::istream &is, bool binary) { - // In order to avoid rewriting this, we just declare a Vector and - // use it to read the data, then copy. - Vector tmp; - tmp.Read(is, binary); - if (tmp.Dim() != Dim()) - KALDI_ERR << "VectorBase::Read, size mismatch " - << Dim() << " vs. " << tmp.Dim(); - CopyFromVec(tmp); +// template +// void VectorBase::AddVec(const float alpha, const VectorBase +// &v); +// template +// void VectorBase::AddVec(const double alpha, const VectorBase +// &v); + +// template +// template +// void VectorBase::AddVec2(const Real alpha, const VectorBase +// &v) { +// KALDI_ASSERT(dim_ == v.dim_); +//// remove __restrict__ if it causes compilation problems. +// Real *__restrict__ data = data_; +// OtherReal *__restrict__ other_data = v.data_; +// MatrixIndexT dim = dim_; +// if (alpha != 1.0) +// for (MatrixIndexT i = 0; i < dim; i++) +// data[i] += alpha * other_data[i] * other_data[i]; +// else +// for (MatrixIndexT i = 0; i < dim; i++) +// data[i] += other_data[i] * other_data[i]; +//} + +// template +// void VectorBase::AddVec2(const float alpha, const VectorBase +// &v); +// template +// void VectorBase::AddVec2(const double alpha, const VectorBase +// &v); + + +template +void VectorBase::Read(std::istream &is, bool binary) { + // In order to avoid rewriting this, we just declare a Vector and + // use it to read the data, then copy. + Vector tmp; + tmp.Read(is, binary); + if (tmp.Dim() != Dim()) + KALDI_ERR << "VectorBase::Read, size mismatch " << Dim() + << " vs. " << tmp.Dim(); + CopyFromVec(tmp); } -template -void Vector::Read(std::istream &is, bool binary) { - std::ostringstream specific_error; - MatrixIndexT pos_at_start = is.tellg(); - - if (binary) { - int peekval = Peek(is, binary); - const char *my_token = (sizeof(Real) == 4 ? "FV" : "DV"); - char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F'); - if (peekval == other_token_start) { // need to instantiate the other type to read it. - typedef typename OtherReal::Real OtherType; // if Real == float, OtherType == double, and vice versa. - Vector other(this->Dim()); - other.Read(is, binary); // add is false at this point. - if (this->Dim() != other.Dim()) this->Resize(other.Dim()); - this->CopyFromVec(other); - return; - } - std::string token; - ReadToken(is, binary, &token); - if (token != my_token) { - if (token.length() > 20) token = token.substr(0, 17) + "..."; - specific_error << ": Expected token " << my_token << ", got " << token; - goto bad; - } - int32 size; - ReadBasicType(is, binary, &size); // throws on error. - if ((MatrixIndexT)size != this->Dim()) this->Resize(size); - if (size > 0) - is.read(reinterpret_cast(this->data_), sizeof(Real)*size); - if (is.fail()) { - specific_error << "Error reading vector data (binary mode); truncated " - "stream? (size = " << size << ")"; - goto bad; - } - return; - } else { // Text mode reading; format is " [ 1.1 2.0 3.4 ]\n" - std::string s; - is >> s; - // if ((s.compare("DV") == 0) || (s.compare("FV") == 0)) { // Back compatibility. - // is >> s; // get dimension - // is >> s; // get "[" - // } - if (is.fail()) { specific_error << "EOF while trying to read vector."; goto bad; } - if (s.compare("[]") == 0) { Resize(0); return; } // tolerate this variant. - if (s.compare("[")) { - if (s.length() > 20) s = s.substr(0, 17) + "..."; - specific_error << "Expected \"[\" but got " << s; - goto bad; - } - std::vector data; - while (1) { - int i = is.peek(); - if (i == '-' || (i >= '0' && i <= '9')) { // common cases first. - Real r; - is >> r; - if (is.fail()) { specific_error << "Failed to read number."; goto bad; } - if (! std::isspace(is.peek()) && is.peek() != ']') { - specific_error << "Expected whitespace after number."; goto bad; +template +void Vector::Read(std::istream &is, bool binary) { + std::ostringstream specific_error; + MatrixIndexT pos_at_start = is.tellg(); + + if (binary) { + int peekval = Peek(is, binary); + const char *my_token = (sizeof(Real) == 4 ? "FV" : "DV"); + char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F'); + if (peekval == other_token_start) { // need to instantiate the other + // type to read it. + typedef typename OtherReal::Real OtherType; // if Real == + // float, + // OtherType == + // double, and + // vice versa. + Vector other(this->Dim()); + other.Read(is, binary); // add is false at this point. + if (this->Dim() != other.Dim()) this->Resize(other.Dim()); + this->CopyFromVec(other); + return; + } + std::string token; + ReadToken(is, binary, &token); + if (token != my_token) { + if (token.length() > 20) token = token.substr(0, 17) + "..."; + specific_error << ": Expected token " << my_token << ", got " + << token; + goto bad; } - data.push_back(r); - // But don't eat whitespace... we want to check that it's not newlines - // which would be valid only for a matrix. - } else if (i == ' ' || i == '\t') { - is.get(); - } else if (i == ']') { - is.get(); // eat the ']' - this->Resize(data.size()); - for (size_t j = 0; j < data.size(); j++) - this->data_[j] = data[j]; - i = is.peek(); - if (static_cast(i) == '\r') { - is.get(); - is.get(); // get \r\n (must eat what we wrote) - } else if (static_cast(i) == '\n') { is.get(); } // get \n (must eat what we wrote) + int32 size; + ReadBasicType(is, binary, &size); // throws on error. + if ((MatrixIndexT)size != this->Dim()) this->Resize(size); + if (size > 0) + is.read(reinterpret_cast(this->data_), sizeof(Real) * size); if (is.fail()) { - KALDI_WARN << "After end of vector data, read error."; - // we got the data we needed, so just warn for this error. + specific_error + << "Error reading vector data (binary mode); truncated " + "stream? (size = " + << size << ")"; + goto bad; } - return; // success. - } else if (i == -1) { - specific_error << "EOF while reading vector data."; - goto bad; - } else if (i == '\n' || i == '\r') { - specific_error << "Newline found while reading vector (maybe it's a matrix?)"; - goto bad; - } else { - is >> s; // read string. - if (!KALDI_STRCASECMP(s.c_str(), "inf") || - !KALDI_STRCASECMP(s.c_str(), "infinity")) { - data.push_back(std::numeric_limits::infinity()); - KALDI_WARN << "Reading infinite value into vector."; - } else if (!KALDI_STRCASECMP(s.c_str(), "nan")) { - data.push_back(std::numeric_limits::quiet_NaN()); - KALDI_WARN << "Reading NaN value into vector."; - } else { - if (s.length() > 20) s = s.substr(0, 17) + "..."; - specific_error << "Expecting numeric vector data, got " << s; - goto bad; + return; + } else { // Text mode reading; format is " [ 1.1 2.0 3.4 ]\n" + std::string s; + is >> s; + // if ((s.compare("DV") == 0) || (s.compare("FV") == 0)) { // Back + // compatibility. + // is >> s; // get dimension + // is >> s; // get "[" + // } + if (is.fail()) { + specific_error << "EOF while trying to read vector."; + goto bad; + } + if (s.compare("[]") == 0) { + Resize(0); + return; + } // tolerate this variant. + if (s.compare("[")) { + if (s.length() > 20) s = s.substr(0, 17) + "..."; + specific_error << "Expected \"[\" but got " << s; + goto bad; + } + std::vector data; + while (1) { + int i = is.peek(); + if (i == '-' || (i >= '0' && i <= '9')) { // common cases first. + Real r; + is >> r; + if (is.fail()) { + specific_error << "Failed to read number."; + goto bad; + } + if (!std::isspace(is.peek()) && is.peek() != ']') { + specific_error << "Expected whitespace after number."; + goto bad; + } + data.push_back(r); + // But don't eat whitespace... we want to check that it's not + // newlines + // which would be valid only for a matrix. + } else if (i == ' ' || i == '\t') { + is.get(); + } else if (i == ']') { + is.get(); // eat the ']' + this->Resize(data.size()); + for (size_t j = 0; j < data.size(); j++) + this->data_[j] = data[j]; + i = is.peek(); + if (static_cast(i) == '\r') { + is.get(); + is.get(); // get \r\n (must eat what we wrote) + } else if (static_cast(i) == '\n') { + is.get(); + } // get \n (must eat what we wrote) + if (is.fail()) { + KALDI_WARN << "After end of vector data, read error."; + // we got the data we needed, so just warn for this error. + } + return; // success. + } else if (i == -1) { + specific_error << "EOF while reading vector data."; + goto bad; + } else if (i == '\n' || i == '\r') { + specific_error << "Newline found while reading vector (maybe " + "it's a matrix?)"; + goto bad; + } else { + is >> s; // read string. + if (!KALDI_STRCASECMP(s.c_str(), "inf") || + !KALDI_STRCASECMP(s.c_str(), "infinity")) { + data.push_back(std::numeric_limits::infinity()); + KALDI_WARN << "Reading infinite value into vector."; + } else if (!KALDI_STRCASECMP(s.c_str(), "nan")) { + data.push_back(std::numeric_limits::quiet_NaN()); + KALDI_WARN << "Reading NaN value into vector."; + } else { + if (s.length() > 20) s = s.substr(0, 17) + "..."; + specific_error << "Expecting numeric vector data, got " + << s; + goto bad; + } + } } - } } - } - // we never reach this line (the while loop returns directly). +// we never reach this line (the while loop returns directly). bad: - KALDI_ERR << "Failed to read vector from stream. " << specific_error.str() - << " File position at start is " - << pos_at_start<<", currently "< -void VectorBase::Write(std::ostream & os, bool binary) const { - if (!os.good()) { - KALDI_ERR << "Failed to write vector to stream: stream not good"; - } - if (binary) { - std::string my_token = (sizeof(Real) == 4 ? "FV" : "DV"); - WriteToken(os, binary, my_token); - - int32 size = Dim(); // make the size 32-bit on disk. - KALDI_ASSERT(Dim() == (MatrixIndexT) size); - WriteBasicType(os, binary, size); - os.write(reinterpret_cast(Data()), sizeof(Real) * size); - } else { - os << " [ "; - for (MatrixIndexT i = 0; i < Dim(); i++) - os << (*this)(i) << " "; - os << "]\n"; - } - if (!os.good()) - KALDI_ERR << "Failed to write vector to stream"; +template +void VectorBase::Write(std::ostream &os, bool binary) const { + if (!os.good()) { + KALDI_ERR << "Failed to write vector to stream: stream not good"; + } + if (binary) { + std::string my_token = (sizeof(Real) == 4 ? "FV" : "DV"); + WriteToken(os, binary, my_token); + + int32 size = Dim(); // make the size 32-bit on disk. + KALDI_ASSERT(Dim() == (MatrixIndexT)size); + WriteBasicType(os, binary, size); + os.write(reinterpret_cast(Data()), sizeof(Real) * size); + } else { + os << " [ "; + for (MatrixIndexT i = 0; i < Dim(); i++) os << (*this)(i) << " "; + os << "]\n"; + } + if (!os.good()) KALDI_ERR << "Failed to write vector to stream"; } -//template -//void VectorBase::AddVec2(const Real alpha, const VectorBase &v) { - //KALDI_ASSERT(dim_ == v.dim_); - //for (MatrixIndexT i = 0; i < dim_; i++) - //data_[i] += alpha * v.data_[i] * v.data_[i]; +// template +// void VectorBase::AddVec2(const Real alpha, const VectorBase &v) { +// KALDI_ASSERT(dim_ == v.dim_); +// for (MatrixIndexT i = 0; i < dim_; i++) +// data_[i] += alpha * v.data_[i] * v.data_[i]; //} //// this <-- beta*this + alpha*M*v. -//template -//void VectorBase::AddTpVec(const Real alpha, const TpMatrix &M, - //const MatrixTransposeType trans, - //const VectorBase &v, - //const Real beta) { - //KALDI_ASSERT(dim_ == v.dim_ && dim_ == M.NumRows()); - //if (beta == 0.0) { - //if (&v != this) CopyFromVec(v); - //MulTp(M, trans); - //if (alpha != 1.0) Scale(alpha); - //} else { - //Vector tmp(v); - //tmp.MulTp(M, trans); - //if (beta != 1.0) Scale(beta); // *this <-- beta * *this - //AddVec(alpha, tmp); // *this += alpha * M * v - //} -//} - -//template -//Real VecMatVec(const VectorBase &v1, const MatrixBase &M, - //const VectorBase &v2) { - //KALDI_ASSERT(v1.Dim() == M.NumRows() && v2.Dim() == M.NumCols()); - //Vector vtmp(M.NumRows()); - //vtmp.AddMatVec(1.0, M, kNoTrans, v2, 0.0); - //return VecVec(v1, vtmp); -//} - -//template -//float VecMatVec(const VectorBase &v1, const MatrixBase &M, - //const VectorBase &v2); -//template -//double VecMatVec(const VectorBase &v1, const MatrixBase &M, - //const VectorBase &v2); +// template +// void VectorBase::AddTpVec(const Real alpha, const TpMatrix &M, +// const MatrixTransposeType trans, +// const VectorBase &v, +// const Real beta) { +// KALDI_ASSERT(dim_ == v.dim_ && dim_ == M.NumRows()); +// if (beta == 0.0) { +// if (&v != this) CopyFromVec(v); +// MulTp(M, trans); +// if (alpha != 1.0) Scale(alpha); +//} else { +// Vector tmp(v); +// tmp.MulTp(M, trans); +// if (beta != 1.0) Scale(beta); // *this <-- beta * *this +// AddVec(alpha, tmp); // *this += alpha * M * v +//} +//} -template +// template +// Real VecMatVec(const VectorBase &v1, const MatrixBase &M, +// const VectorBase &v2) { +// KALDI_ASSERT(v1.Dim() == M.NumRows() && v2.Dim() == M.NumCols()); +// Vector vtmp(M.NumRows()); +// vtmp.AddMatVec(1.0, M, kNoTrans, v2, 0.0); +// return VecVec(v1, vtmp); +//} + +// template +// float VecMatVec(const VectorBase &v1, const MatrixBase &M, +// const VectorBase &v2); +// template +// double VecMatVec(const VectorBase &v1, const MatrixBase &M, +// const VectorBase &v2); + +template void Vector::Swap(Vector *other) { - std::swap(this->data_, other->data_); - std::swap(this->dim_, other->dim_); + std::swap(this->data_, other->data_); + std::swap(this->dim_, other->dim_); } -//template -//void VectorBase::AddDiagMat2( - //Real alpha, const MatrixBase &M, - //MatrixTransposeType trans, Real beta) { - //if (trans == kNoTrans) { - //KALDI_ASSERT(this->dim_ == M.NumRows()); - //MatrixIndexT rows = this->dim_, cols = M.NumCols(), - //mat_stride = M.Stride(); - //Real *data = this->data_; - //const Real *mat_data = M.Data(); - //for (MatrixIndexT i = 0; i < rows; i++, mat_data += mat_stride, data++) - //*data = beta * *data + alpha * cblas_Xdot(cols,mat_data,1,mat_data,1); - //} else { - //KALDI_ASSERT(this->dim_ == M.NumCols()); - //MatrixIndexT rows = M.NumRows(), cols = this->dim_, - //mat_stride = M.Stride(); - //Real *data = this->data_; - //const Real *mat_data = M.Data(); - //for (MatrixIndexT i = 0; i < cols; i++, mat_data++, data++) - //*data = beta * *data + alpha * cblas_Xdot(rows, mat_data, mat_stride, - //mat_data, mat_stride); - //} -//} - -//template -//void VectorBase::AddDiagMatMat( - //Real alpha, - //const MatrixBase &M, MatrixTransposeType transM, - //const MatrixBase &N, MatrixTransposeType transN, - //Real beta) { - //MatrixIndexT dim = this->dim_, - //M_col_dim = (transM == kTrans ? M.NumRows() : M.NumCols()), - //N_row_dim = (transN == kTrans ? N.NumCols() : N.NumRows()); - //KALDI_ASSERT(M_col_dim == N_row_dim); // this is the dimension we sum over - //MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1; - //if (transM == kTrans) std::swap(M_row_stride, M_col_stride); - //MatrixIndexT N_row_stride = N.Stride(), N_col_stride = 1; - //if (transN == kTrans) std::swap(N_row_stride, N_col_stride); - - //Real *data = this->data_; - //const Real *Mdata = M.Data(), *Ndata = N.Data(); - //for (MatrixIndexT i = 0; i < dim; i++, Mdata += M_row_stride, Ndata += N_col_stride, data++) { - //*data = beta * *data + alpha * cblas_Xdot(M_col_dim, Mdata, M_col_stride, Ndata, N_row_stride); - //} +// template +// void VectorBase::AddDiagMat2( +// Real alpha, const MatrixBase &M, +// MatrixTransposeType trans, Real beta) { +// if (trans == kNoTrans) { +// KALDI_ASSERT(this->dim_ == M.NumRows()); +// MatrixIndexT rows = this->dim_, cols = M.NumCols(), +// mat_stride = M.Stride(); +// Real *data = this->data_; +// const Real *mat_data = M.Data(); +// for (MatrixIndexT i = 0; i < rows; i++, mat_data += mat_stride, data++) +//*data = beta * *data + alpha * cblas_Xdot(cols,mat_data,1,mat_data,1); +//} else { +// KALDI_ASSERT(this->dim_ == M.NumCols()); +// MatrixIndexT rows = M.NumRows(), cols = this->dim_, +// mat_stride = M.Stride(); +// Real *data = this->data_; +// const Real *mat_data = M.Data(); +// for (MatrixIndexT i = 0; i < cols; i++, mat_data++, data++) +//*data = beta * *data + alpha * cblas_Xdot(rows, mat_data, mat_stride, +// mat_data, mat_stride); +//} +//} + +// template +// void VectorBase::AddDiagMatMat( +// Real alpha, +// const MatrixBase &M, MatrixTransposeType transM, +// const MatrixBase &N, MatrixTransposeType transN, +// Real beta) { +// MatrixIndexT dim = this->dim_, +// M_col_dim = (transM == kTrans ? M.NumRows() : M.NumCols()), +// N_row_dim = (transN == kTrans ? N.NumCols() : N.NumRows()); +// KALDI_ASSERT(M_col_dim == N_row_dim); // this is the dimension we sum over +// MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1; +// if (transM == kTrans) std::swap(M_row_stride, M_col_stride); +// MatrixIndexT N_row_stride = N.Stride(), N_col_stride = 1; +// if (transN == kTrans) std::swap(N_row_stride, N_col_stride); + +// Real *data = this->data_; +// const Real *Mdata = M.Data(), *Ndata = N.Data(); +// for (MatrixIndexT i = 0; i < dim; i++, Mdata += M_row_stride, Ndata += +// N_col_stride, data++) { +//*data = beta * *data + alpha * cblas_Xdot(M_col_dim, Mdata, M_col_stride, +//Ndata, N_row_stride); +//} //} diff --git a/runtime/engine/common/matrix/kaldi-vector.h b/runtime/engine/common/matrix/kaldi-vector.h index 5bcbeda90..461e026d3 100644 --- a/runtime/engine/common/matrix/kaldi-vector.h +++ b/runtime/engine/common/matrix/kaldi-vector.h @@ -37,265 +37,274 @@ namespace kaldi { /// Provides a vector abstraction class. /// This class provides a way to work with vectors in kaldi. /// It encapsulates basic operations and memory optimizations. -template +template class VectorBase { - public: - /// Set vector to all zeros. - void SetZero(); - - /// Returns true if matrix is all zeros. - bool IsZero(Real cutoff = 1.0e-06) const; // replace magic number - - /// Set all members of a vector to a specified value. - void Set(Real f); - - /// Returns the dimension of the vector. - inline MatrixIndexT Dim() const { return dim_; } - - /// Returns the size in memory of the vector, in bytes. - inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); } - - /// Returns a pointer to the start of the vector's data. - inline Real* Data() { return data_; } - - /// Returns a pointer to the start of the vector's data (const). - inline const Real* Data() const { return data_; } - - /// Indexing operator (const). - inline Real operator() (MatrixIndexT i) const { - KALDI_PARANOID_ASSERT(static_cast(i) < - static_cast(dim_)); - return *(data_ + i); - } - - /// Indexing operator (non-const). - inline Real & operator() (MatrixIndexT i) { - KALDI_PARANOID_ASSERT(static_cast(i) < - static_cast(dim_)); - return *(data_ + i); - } - - /** @brief Returns a sub-vector of a vector (a range of elements). - * @param o [in] Origin, 0 < o < Dim() - * @param l [in] Length 0 < l < Dim()-o - * @return A SubVector object that aliases the data of the Vector object. - * See @c SubVector class for details */ - SubVector Range(const MatrixIndexT o, const MatrixIndexT l) { - return SubVector(*this, o, l); - } - - /** @brief Returns a const sub-vector of a vector (a range of elements). - * @param o [in] Origin, 0 < o < Dim() - * @param l [in] Length 0 < l < Dim()-o - * @return A SubVector object that aliases the data of the Vector object. - * See @c SubVector class for details */ - const SubVector Range(const MatrixIndexT o, - const MatrixIndexT l) const { - return SubVector(*this, o, l); - } - - /// Copy data from another vector (must match own size). - void CopyFromVec(const VectorBase &v); - - /// Copy data from another vector of different type (double vs. float) - template - void CopyFromVec(const VectorBase &v); - - /// Performs a row stack of the matrix M - void CopyRowsFromMat(const MatrixBase &M); - template - void CopyRowsFromMat(const MatrixBase &M); - - /// Performs a column stack of the matrix M - void CopyColsFromMat(const MatrixBase &M); - - /// Extracts a row of the matrix M. Could also do this with - /// this->Copy(M[row]). - void CopyRowFromMat(const MatrixBase &M, MatrixIndexT row); - /// Extracts a row of the matrix M with type conversion. - template - void CopyRowFromMat(const MatrixBase &M, MatrixIndexT row); - - /// Extracts a column of the matrix M. - template - void CopyColFromMat(const MatrixBase &M , MatrixIndexT col); - - /// Reads from C++ stream (option to add to existing contents). - /// Throws exception on failure - void Read(std::istream &in, bool binary); - - /// Writes to C++ stream (option to write in binary). - void Write(std::ostream &Out, bool binary) const; - - friend class VectorBase; - friend class VectorBase; - protected: - /// Destructor; does not deallocate memory, this is handled by child classes. - /// This destructor is protected so this object can only be - /// deleted via a child. - ~VectorBase() {} - - /// Empty initializer, corresponds to vector of zero size. - explicit VectorBase(): data_(NULL), dim_(0) { - KALDI_ASSERT_IS_FLOATING_TYPE(Real); - } - - /// data memory area - Real* data_; - /// dimension of vector - MatrixIndexT dim_; - KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase); -}; // class VectorBase + public: + /// Set vector to all zeros. + void SetZero(); + + /// Returns true if matrix is all zeros. + bool IsZero(Real cutoff = 1.0e-06) const; // replace magic number + + /// Set all members of a vector to a specified value. + void Set(Real f); + + /// Returns the dimension of the vector. + inline MatrixIndexT Dim() const { return dim_; } + + /// Returns the size in memory of the vector, in bytes. + inline MatrixIndexT SizeInBytes() const { return (dim_ * sizeof(Real)); } + + /// Returns a pointer to the start of the vector's data. + inline Real *Data() { return data_; } + + /// Returns a pointer to the start of the vector's data (const). + inline const Real *Data() const { return data_; } + + /// Indexing operator (const). + inline Real operator()(MatrixIndexT i) const { + KALDI_PARANOID_ASSERT(static_cast(i) < + static_cast(dim_)); + return *(data_ + i); + } + + /// Indexing operator (non-const). + inline Real &operator()(MatrixIndexT i) { + KALDI_PARANOID_ASSERT(static_cast(i) < + static_cast(dim_)); + return *(data_ + i); + } + + /** @brief Returns a sub-vector of a vector (a range of elements). + * @param o [in] Origin, 0 < o < Dim() + * @param l [in] Length 0 < l < Dim()-o + * @return A SubVector object that aliases the data of the Vector object. + * See @c SubVector class for details */ + SubVector Range(const MatrixIndexT o, const MatrixIndexT l) { + return SubVector(*this, o, l); + } + + /** @brief Returns a const sub-vector of a vector (a range of elements). + * @param o [in] Origin, 0 < o < Dim() + * @param l [in] Length 0 < l < Dim()-o + * @return A SubVector object that aliases the data of the Vector object. + * See @c SubVector class for details */ + const SubVector Range(const MatrixIndexT o, + const MatrixIndexT l) const { + return SubVector(*this, o, l); + } + + /// Copy data from another vector (must match own size). + void CopyFromVec(const VectorBase &v); + + /// Copy data from another vector of different type (double vs. float) + template + void CopyFromVec(const VectorBase &v); + + /// Performs a row stack of the matrix M + void CopyRowsFromMat(const MatrixBase &M); + template + void CopyRowsFromMat(const MatrixBase &M); + + /// Performs a column stack of the matrix M + void CopyColsFromMat(const MatrixBase &M); + + /// Extracts a row of the matrix M. Could also do this with + /// this->Copy(M[row]). + void CopyRowFromMat(const MatrixBase &M, MatrixIndexT row); + /// Extracts a row of the matrix M with type conversion. + template + void CopyRowFromMat(const MatrixBase &M, MatrixIndexT row); + + /// Extracts a column of the matrix M. + template + void CopyColFromMat(const MatrixBase &M, MatrixIndexT col); + + /// Reads from C++ stream (option to add to existing contents). + /// Throws exception on failure + void Read(std::istream &in, bool binary); + + /// Writes to C++ stream (option to write in binary). + void Write(std::ostream &Out, bool binary) const; + + friend class VectorBase; + friend class VectorBase; + + protected: + /// Destructor; does not deallocate memory, this is handled by child + /// classes. + /// This destructor is protected so this object can only be + /// deleted via a child. + ~VectorBase() {} + + /// Empty initializer, corresponds to vector of zero size. + explicit VectorBase() : data_(NULL), dim_(0) { + KALDI_ASSERT_IS_FLOATING_TYPE(Real); + } + + /// data memory area + Real *data_; + /// dimension of vector + MatrixIndexT dim_; + KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase); +}; // class VectorBase /** @brief A class representing a vector. * * This class provides a way to work with vectors in kaldi. * It encapsulates basic operations and memory optimizations. */ -template -class Vector: public VectorBase { - public: - /// Constructor that takes no arguments. Initializes to empty. - Vector(): VectorBase() {} - - /// Constructor with specific size. Sets to all-zero by default - /// if set_zero == false, memory contents are undefined. - explicit Vector(const MatrixIndexT s, - MatrixResizeType resize_type = kSetZero) - : VectorBase() { Resize(s, resize_type); } - - /// Copy constructor from CUDA vector - /// This is defined in ../cudamatrix/cu-vector.h - //template - //explicit Vector(const CuVectorBase &cu); - - /// Copy constructor. The need for this is controversial. - Vector(const Vector &v) : VectorBase() { // (cannot be explicit) - Resize(v.Dim(), kUndefined); - this->CopyFromVec(v); - } - - /// Copy-constructor from base-class, needed to copy from SubVector. - explicit Vector(const VectorBase &v) : VectorBase() { - Resize(v.Dim(), kUndefined); - this->CopyFromVec(v); - } - - /// Type conversion constructor. - template - explicit Vector(const VectorBase &v): VectorBase() { - Resize(v.Dim(), kUndefined); - this->CopyFromVec(v); - } - -// Took this out since it is unsafe : Arnab -// /// Constructor from a pointer and a size; copies the data to a location -// /// it owns. -// Vector(const Real* Data, const MatrixIndexT s): VectorBase() { -// Resize(s); - // CopyFromPtr(Data, s); -// } - - - /// Swaps the contents of *this and *other. Shallow swap. - void Swap(Vector *other); - - /// Destructor. Deallocates memory. - ~Vector() { Destroy(); } - - /// Read function using C++ streams. Can also add to existing contents - /// of matrix. - void Read(std::istream &in, bool binary); - - /// Set vector to a specified size (can be zero). - /// The value of the new data depends on resize_type: - /// -if kSetZero, the new data will be zero - /// -if kUndefined, the new data will be undefined - /// -if kCopyData, the new data will be the same as the old data in any - /// shared positions, and zero elsewhere. - /// This function takes time proportional to the number of data elements. - void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero); - - /// Remove one element and shifts later elements down. - void RemoveElement(MatrixIndexT i); - - /// Assignment operator. - Vector &operator = (const Vector &other) { - Resize(other.Dim(), kUndefined); - this->CopyFromVec(other); - return *this; - } - - /// Assignment operator that takes VectorBase. - Vector &operator = (const VectorBase &other) { - Resize(other.Dim(), kUndefined); - this->CopyFromVec(other); - return *this; - } - private: - /// Init assumes the current contents of the class are invalid (i.e. junk or - /// has already been freed), and it sets the vector to newly allocated memory - /// with the specified dimension. dim == 0 is acceptable. The memory contents - /// pointed to by data_ will be undefined. - void Init(const MatrixIndexT dim); - - /// Destroy function, called internally. - void Destroy(); - +template +class Vector : public VectorBase { + public: + /// Constructor that takes no arguments. Initializes to empty. + Vector() : VectorBase() {} + + /// Constructor with specific size. Sets to all-zero by default + /// if set_zero == false, memory contents are undefined. + explicit Vector(const MatrixIndexT s, + MatrixResizeType resize_type = kSetZero) + : VectorBase() { + Resize(s, resize_type); + } + + /// Copy constructor from CUDA vector + /// This is defined in ../cudamatrix/cu-vector.h + // template + // explicit Vector(const CuVectorBase &cu); + + /// Copy constructor. The need for this is controversial. + Vector(const Vector &v) + : VectorBase() { // (cannot be explicit) + Resize(v.Dim(), kUndefined); + this->CopyFromVec(v); + } + + /// Copy-constructor from base-class, needed to copy from SubVector. + explicit Vector(const VectorBase &v) : VectorBase() { + Resize(v.Dim(), kUndefined); + this->CopyFromVec(v); + } + + /// Type conversion constructor. + template + explicit Vector(const VectorBase &v) : VectorBase() { + Resize(v.Dim(), kUndefined); + this->CopyFromVec(v); + } + + // Took this out since it is unsafe : Arnab + // /// Constructor from a pointer and a size; copies the data to a location + // /// it owns. + // Vector(const Real* Data, const MatrixIndexT s): VectorBase() { + // Resize(s); + // CopyFromPtr(Data, s); + // } + + + /// Swaps the contents of *this and *other. Shallow swap. + void Swap(Vector *other); + + /// Destructor. Deallocates memory. + ~Vector() { Destroy(); } + + /// Read function using C++ streams. Can also add to existing contents + /// of matrix. + void Read(std::istream &in, bool binary); + + /// Set vector to a specified size (can be zero). + /// The value of the new data depends on resize_type: + /// -if kSetZero, the new data will be zero + /// -if kUndefined, the new data will be undefined + /// -if kCopyData, the new data will be the same as the old data in any + /// shared positions, and zero elsewhere. + /// This function takes time proportional to the number of data elements. + void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero); + + /// Remove one element and shifts later elements down. + void RemoveElement(MatrixIndexT i); + + /// Assignment operator. + Vector &operator=(const Vector &other) { + Resize(other.Dim(), kUndefined); + this->CopyFromVec(other); + return *this; + } + + /// Assignment operator that takes VectorBase. + Vector &operator=(const VectorBase &other) { + Resize(other.Dim(), kUndefined); + this->CopyFromVec(other); + return *this; + } + + private: + /// Init assumes the current contents of the class are invalid (i.e. junk or + /// has already been freed), and it sets the vector to newly allocated + /// memory + /// with the specified dimension. dim == 0 is acceptable. The memory + /// contents + /// pointed to by data_ will be undefined. + void Init(const MatrixIndexT dim); + + /// Destroy function, called internally. + void Destroy(); }; /// Represents a non-allocating general vector which can be defined /// as a sub-vector of higher-level vector [or as the row of a matrix]. -template +template class SubVector : public VectorBase { - public: - /// Constructor from a Vector or SubVector. - /// SubVectors are not const-safe and it's very hard to make them - /// so for now we just give up. This function contains const_cast. - SubVector(const VectorBase &t, const MatrixIndexT origin, - const MatrixIndexT length) : VectorBase() { - // following assert equiv to origin>=0 && length>=0 && - // origin+length <= rt.dim_ - KALDI_ASSERT(static_cast(origin)+ - static_cast(length) <= - static_cast(t.Dim())); - VectorBase::data_ = const_cast (t.Data()+origin); - VectorBase::dim_ = length; - } - - /// This constructor initializes the vector to point at the contents - /// of this packed matrix (SpMatrix or TpMatrix). - // SubVector(const PackedMatrix &M) { - //VectorBase::data_ = const_cast (M.Data()); - //VectorBase::dim_ = (M.NumRows()*(M.NumRows()+1))/2; - //} - - /// Copy constructor - SubVector(const SubVector &other) : VectorBase () { - // this copy constructor needed for Range() to work in base class. - VectorBase::data_ = other.data_; - VectorBase::dim_ = other.dim_; - } - - /// Constructor from a pointer to memory and a length. Keeps a pointer - /// to the data but does not take ownership (will never delete). - /// Caution: this constructor enables you to evade const constraints. - SubVector(const Real *data, MatrixIndexT length) : VectorBase () { - VectorBase::data_ = const_cast(data); - VectorBase::dim_ = length; - } - - /// This operation does not preserve const-ness, so be careful. - SubVector(const MatrixBase &matrix, MatrixIndexT row) { - VectorBase::data_ = const_cast(matrix.RowData(row)); - VectorBase::dim_ = matrix.NumCols(); - } - - ~SubVector() {} ///< Destructor (does nothing; no pointers are owned here). - - private: - /// Disallow assignment operator. - SubVector & operator = (const SubVector &other) {} + public: + /// Constructor from a Vector or SubVector. + /// SubVectors are not const-safe and it's very hard to make them + /// so for now we just give up. This function contains const_cast. + SubVector(const VectorBase &t, + const MatrixIndexT origin, + const MatrixIndexT length) + : VectorBase() { + // following assert equiv to origin>=0 && length>=0 && + // origin+length <= rt.dim_ + KALDI_ASSERT(static_cast(origin) + + static_cast(length) <= + static_cast(t.Dim())); + VectorBase::data_ = const_cast(t.Data() + origin); + VectorBase::dim_ = length; + } + + /// This constructor initializes the vector to point at the contents + /// of this packed matrix (SpMatrix or TpMatrix). + // SubVector(const PackedMatrix &M) { + // VectorBase::data_ = const_cast (M.Data()); + // VectorBase::dim_ = (M.NumRows()*(M.NumRows()+1))/2; + //} + + /// Copy constructor + SubVector(const SubVector &other) : VectorBase() { + // this copy constructor needed for Range() to work in base class. + VectorBase::data_ = other.data_; + VectorBase::dim_ = other.dim_; + } + + /// Constructor from a pointer to memory and a length. Keeps a pointer + /// to the data but does not take ownership (will never delete). + /// Caution: this constructor enables you to evade const constraints. + SubVector(const Real *data, MatrixIndexT length) : VectorBase() { + VectorBase::data_ = const_cast(data); + VectorBase::dim_ = length; + } + + /// This operation does not preserve const-ness, so be careful. + SubVector(const MatrixBase &matrix, MatrixIndexT row) { + VectorBase::data_ = const_cast(matrix.RowData(row)); + VectorBase::dim_ = matrix.NumCols(); + } + + ~SubVector() {} ///< Destructor (does nothing; no pointers are owned here). + + private: + /// Disallow assignment operator. + SubVector &operator=(const SubVector &other) {} }; /// @} end of "addtogroup matrix_group" @@ -303,43 +312,41 @@ class SubVector : public VectorBase { /// @{ /// Output to a C++ stream. Non-binary by default (use Write for /// binary output). -template -std::ostream & operator << (std::ostream & out, const VectorBase & v); +template +std::ostream &operator<<(std::ostream &out, const VectorBase &v); /// Input from a C++ stream. Will automatically read text or /// binary data from the stream. -template -std::istream & operator >> (std::istream & in, VectorBase & v); +template +std::istream &operator>>(std::istream &in, VectorBase &v); /// Input from a C++ stream. Will automatically read text or /// binary data from the stream. -template -std::istream & operator >> (std::istream & in, Vector & v); +template +std::istream &operator>>(std::istream &in, Vector &v); /// @} end of \addtogroup matrix_funcs_io /// \addtogroup matrix_funcs_scalar /// @{ -//template -//bool ApproxEqual(const VectorBase &a, - //const VectorBase &b, Real tol = 0.01) { - //return a.ApproxEqual(b, tol); +// template +// bool ApproxEqual(const VectorBase &a, +// const VectorBase &b, Real tol = 0.01) { +// return a.ApproxEqual(b, tol); //} -//template -//inline void AssertEqual(VectorBase &a, VectorBase &b, - //float tol = 0.01) { - //KALDI_ASSERT(a.ApproxEqual(b, tol)); +// template +// inline void AssertEqual(VectorBase &a, VectorBase &b, +// float tol = 0.01) { +// KALDI_ASSERT(a.ApproxEqual(b, tol)); //} - } // namespace kaldi // we need to include the implementation #include "matrix/kaldi-vector-inl.h" - #endif // KALDI_MATRIX_KALDI_VECTOR_H_ diff --git a/runtime/engine/common/matrix/matrix-common.h b/runtime/engine/common/matrix/matrix-common.h index b7bdbbc8b..512beb204 100644 --- a/runtime/engine/common/matrix/matrix-common.h +++ b/runtime/engine/common/matrix/matrix-common.h @@ -27,52 +27,58 @@ namespace kaldi { // this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library -// we are writing them as literals because we don't want to include here matrix/kaldi-blas.h, -// which puts many symbols into global scope (like "real") via the header f2c.h +// we are writing them as literals because we don't want to include here +// matrix/kaldi-blas.h, +// which puts many symbols into global scope (like "real") via the header f2c.h typedef enum { - kTrans = 112, // = CblasTrans - kNoTrans = 111 // = CblasNoTrans + kTrans = 112, // = CblasTrans + kNoTrans = 111 // = CblasNoTrans } MatrixTransposeType; -typedef enum { - kSetZero, - kUndefined, - kCopyData -} MatrixResizeType; +typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType; typedef enum { - kDefaultStride, - kStrideEqualNumCols, + kDefaultStride, + kStrideEqualNumCols, } MatrixStrideType; typedef enum { - kTakeLower, - kTakeUpper, - kTakeMean, - kTakeMeanAndCheck + kTakeLower, + kTakeUpper, + kTakeMean, + kTakeMeanAndCheck } SpCopyType; -template class VectorBase; -template class Vector; -template class SubVector; -template class MatrixBase; -template class SubMatrix; -template class Matrix; +template +class VectorBase; +template +class Vector; +template +class SubVector; +template +class MatrixBase; +template +class SubMatrix; +template +class Matrix; /// This class provides a way for switching between double and float types. -template class OtherReal { }; // useful in reading+writing routines - // to switch double and float. +template +class OtherReal {}; // useful in reading+writing routines + // to switch double and float. /// A specialized class for switching from float to double. -template<> class OtherReal { - public: - typedef double Real; +template <> +class OtherReal { + public: + typedef double Real; }; /// A specialized class for switching from double to float. -template<> class OtherReal { - public: - typedef float Real; +template <> +class OtherReal { + public: + typedef float Real; }; @@ -81,12 +87,10 @@ typedef int32 SignedMatrixIndexT; typedef uint32 UnsignedMatrixIndexT; // If you want to use size_t for the index type, do as follows instead: -//typedef size_t MatrixIndexT; -//typedef ssize_t SignedMatrixIndexT; -//typedef size_t UnsignedMatrixIndexT; - +// typedef size_t MatrixIndexT; +// typedef ssize_t SignedMatrixIndexT; +// typedef size_t UnsignedMatrixIndexT; } - #endif // KALDI_MATRIX_MATRIX_COMMON_H_ diff --git a/runtime/engine/kaldi/CMakeLists.txt b/runtime/engine/kaldi/CMakeLists.txt index f9b42e065..e55cecbb2 100644 --- a/runtime/engine/kaldi/CMakeLists.txt +++ b/runtime/engine/kaldi/CMakeLists.txt @@ -1,14 +1,15 @@ -project(kaldi) include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) add_subdirectory(base) add_subdirectory(util) -add_subdirectory(lat) -add_subdirectory(fstext) -add_subdirectory(decoder) -add_subdirectory(lm) +if(WITH_ASR) + add_subdirectory(lat) + add_subdirectory(fstext) + add_subdirectory(decoder) + add_subdirectory(lm) -add_subdirectory(fstbin) -add_subdirectory(lmbin) + add_subdirectory(fstbin) + add_subdirectory(lmbin) +endif() diff --git a/runtime/engine/kaldi/base/kaldi-types.h b/runtime/engine/kaldi/base/kaldi-types.h index c6a3e1aed..f371e3dae 100644 --- a/runtime/engine/kaldi/base/kaldi-types.h +++ b/runtime/engine/kaldi/base/kaldi-types.h @@ -44,7 +44,19 @@ typedef float BaseFloat; #ifndef COMPILE_WITHOUT_OPENFST +#ifdef WITH_ASR #include +#else +using int8 = int8_t; +using int16 = int16_t; +using int32 = int32_t; +using int64 = int64_t; + +using uint8 = uint8_t; +using uint16 = uint16_t; +using uint32 = uint32_t; +using uint64 = uint64_t; +#endif namespace kaldi { using ::int16; diff --git a/runtime/engine/vad/CMakeLists.txt b/runtime/engine/vad/CMakeLists.txt new file mode 100644 index 000000000..d13cc407c --- /dev/null +++ b/runtime/engine/vad/CMakeLists.txt @@ -0,0 +1,18 @@ +# set(CMAKE_CXX_STANDARD 11) + +# # 指定下载解压后的fastdeploy库路径 +# set(FASTDEPLOY_INSTALL_DIR "fdlib/fastdeploy-linux-x64-1.0.4" CACHE STRING force) + +# if(NOT EXISTS ${FASTDEPLOY_INSTALL_DIR}) +# message(FATAL_ERROR "Please using cmake -B build -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR}") +# endif() + +# include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# # 添加FastDeploy依赖头文件 +# include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_onnx_silero_vad ${CMAKE_CURRENT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h) + +# 添加FastDeploy库依赖 +target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS}) diff --git a/runtime/engine/vad/README.md b/runtime/engine/vad/README.md new file mode 100644 index 000000000..f032be862 --- /dev/null +++ b/runtime/engine/vad/README.md @@ -0,0 +1,121 @@ +English | [简体中文](README_CN.md) + +# Silero VAD Deployment Example + +This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU. + +Before deployment, two steps require confirmation. + +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). +- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). + +Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. + +```bash +mkdir build +cd build +# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav + +# inference +./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav +``` + +- The above command works for Linux or MacOS. Refer to: + - [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows + +## VAD C++ Interface + +### Vad Class + +```c++ +Vad::Vad(const std::string& model_file, + const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) +``` + +**Parameter** + +> * **model_file**(str): Model file path +> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration) + +### setAudioCofig function + +**Must be called before the `init` function** + +```c++ +void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); +``` + +**Parameter** + +> * **sr**(int): sampling rate +> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size +> * **threshold**(float): Result probability judgment threshold +> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence +> * **speech_pad_ms**(int): Used to calculate the end time of the speech + +### init function + +Used to initialize audio-related parameters. + +```c++ +void Vad::init(); +``` + +### loadAudio function + +Load audio. + +```c++ +void Vad::loadAudio(const std::string& wavPath) +``` + +**Parameter** + +> * **wavPath**(str): Audio file path + +### Predict function + +Used to start model reasoning. + +```c++ +bool Vad::Predict(); +``` + +### getResult function + +**Used to obtain reasoning results** + +```c++ +std::vector> Vad::getResult( + float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, + float mergeThreshold = 0.3); +``` + +**Parameter** + +> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold +> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly +> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly +> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly + +**The output result format is**`std::vector>` + +> Output a list, each element is a speech fragment +> +> Each clip can use 'start' to get the start time and 'end' to get the end time + +### Tips + +1. `The setAudioCofig`function must be called before the `init` function +2. The sampling rate of the input audio file must be consistent with that set in the code + +- [Model Description](../) +- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md) diff --git a/runtime/engine/vad/README_CN.md b/runtime/engine/vad/README_CN.md new file mode 100644 index 000000000..c45d9896c --- /dev/null +++ b/runtime/engine/vad/README_CN.md @@ -0,0 +1,119 @@ +[English](README.md) | 简体中文 +# Silero VAD 部署示例 + +本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。 + +```bash +mkdir build +cd build +# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用 +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下 +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav + +# 推理 +./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav +``` + +以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: +- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md) + +## VAD C++ 接口 +### Vad 类 + +```c++ +Vad::Vad(const std::string& model_file, + const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) +``` + +**参数** + +> * **model_file**(str): 模型文件路径 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 + +### setAudioCofig 函数 + +**必须在`init`函数前调用** + +```c++ +void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); +``` + +**参数** + +> * **sr**(int): 采样率 +> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小 +> * **threshold**(float): 结果概率判断阈值 +> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值 +> * **speech_pad_ms**(int): 用于计算 speach 结束时刻 + +### init 函数 + +用于初始化音频相关参数 + +```c++ +void Vad::init(); +``` + +### loadAudio 函数 + +加载音频 + +```c++ +void Vad::loadAudio(const std::string& wavPath) +``` + +**参数** + +> * **wavPath**(str): 音频文件路径 + +### Predict 函数 + +用于开始模型推理 + +```c++ +bool Vad::Predict(); +``` + +### getResult 函数 + +**用于获取推理结果** + +```c++ +std::vector> Vad::getResult( + float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, + float mergeThreshold = 0.3); +``` + +**参数** + +> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃 +> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻 +> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻 +> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段 + +**输出结果格式为**`std::vector>` + +> 输出一个列表,每个元素是一个讲话片段 +> +> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻 + +### 提示 + +1. `setAudioCofig`函数必须在`init`函数前调用 +2. 输入的音频文件的采样率必须与代码中设置的保持一致 + +- [模型介绍](../) +- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/runtime/engine/vad/infer_onnx_silero_vad.cc b/runtime/engine/vad/infer_onnx_silero_vad.cc new file mode 100644 index 000000000..7fb524060 --- /dev/null +++ b/runtime/engine/vad/infer_onnx_silero_vad.cc @@ -0,0 +1,65 @@ + +#include "vad.h" + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio " + "run_option, " + "e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav" + << std::endl; + return -1; + } + + std::string model_file = argv[1]; + std::string audio_file = argv[2]; + + int sr = 16000; + Vad vad(model_file); + // custom config, but must be set before init + vad.SetConfig(sr, 32, 0.45f, 200, 0, 0); + vad.Init(); + + std::vector inputWav; // [0, 1] + wav::WavReader wav_reader = wav::WavReader(audio_file); + assert(wav_reader.sample_rate() == sr); + + + auto num_samples = wav_reader.num_samples(); + inputWav.resize(num_samples); + for (int i = 0; i < num_samples; i++) { + inputWav[i] = wav_reader.data()[i] / 32768; + } + + int window_size_samples = vad.WindowSizeSamples(); + for (int64_t j = 0; j < num_samples; j += window_size_samples) { + auto start = j; + auto end = start + window_size_samples >= num_samples + ? num_samples + : start + window_size_samples; + auto current_chunk_size = end - start; + + std::vector r{&inputWav[0] + start, &inputWav[0] + end}; + assert(r.size() == current_chunk_size); + + if (!vad.ForwardChunk(r)) { + std::cerr << "Failed to inference while using model:" + << vad.ModelName() << "." << std::endl; + return false; + } + + Vad::State s = vad.Postprocess(); + std::cout << s << " "; + } + std::cout << std::endl; + + std::vector> result = vad.GetResult(); + for (auto& res : result) { + std::cout << "speak start: " << res["start"] + << " s, end: " << res["end"] << " s | "; + } + std::cout << "\b\b " << std::endl; + + vad.Reset(); + + return 0; +} diff --git a/runtime/engine/vad/vad.cc b/runtime/engine/vad/vad.cc new file mode 100644 index 000000000..7630b98df --- /dev/null +++ b/runtime/engine/vad/vad.cc @@ -0,0 +1,306 @@ +// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "vad.h" +#include +#include + + +#ifdef NDEBUG +#define LOG_DEBUG \ + ::fastdeploy::FDLogger(true, "[DEBUG]") << __REL_FILE__ << "(" << __LINE__ \ + << ")::" << __FUNCTION__ << "\t" +#else +#define LOG_DEBUG \ + ::fastdeploy::FDLogger(false, "[DEBUG]") \ + << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" +#endif + +Vad::Vad(const std::string& model_file, + const fastdeploy::RuntimeOption& + custom_option /* = fastdeploy::RuntimeOption() */) { + valid_cpu_backends = {fastdeploy::Backend::ORT, + fastdeploy::Backend::OPENVINO}; + valid_gpu_backends = {fastdeploy::Backend::ORT, fastdeploy::Backend::TRT}; + + runtime_option = custom_option; + // ORT backend + runtime_option.UseCpu(); + runtime_option.UseOrtBackend(); + runtime_option.model_format = fastdeploy::ModelFormat::ONNX; + // grap opt level + runtime_option.ort_option.graph_optimization_level = 99; + // one-thread + runtime_option.ort_option.intra_op_num_threads = 1; + runtime_option.ort_option.inter_op_num_threads = 1; + // model path + runtime_option.model_file = model_file; +} + +void Vad::Init() { + std::call_once(init_, [&]() { initialized = Initialize(); }); +} + +std::string Vad::ModelName() const { return "VAD"; } + +void Vad::SetConfig(int sr, + int frame_ms, + float threshold, + int min_silence_duration_ms, + int speech_pad_left_ms, + int speech_pad_right_ms) { + if (initialized) { + fastdeploy::FDERROR << "SetConfig must be called before init" + << std::endl; + throw std::runtime_error("SetConfig must be called before init"); + } + sample_rate_ = sr; + sr_per_ms_ = sr / 1000; + threshold_ = threshold; + frame_ms_ = frame_ms; + min_silence_samples_ = min_silence_duration_ms * sr_per_ms_; + speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_; + speech_pad_right_samples_ = speech_pad_right_ms * sr_per_ms_; + + // init chunk size + window_size_samples_ = frame_ms * sr_per_ms_; + current_chunk_size_ = window_size_samples_; + + fastdeploy::FDINFO << "sr=" << sr << " threshold=" << threshold + << " frame_ms=" << frame_ms + << " min_silence_duration_ms=" << min_silence_duration_ms + << " speech_pad_left_ms=" << speech_pad_left_ms + << " speech_pad_right_ms=" << speech_pad_right_ms; +} + +void Vad::Reset() { + std::memset(h_.data(), 0.0f, h_.size() * sizeof(float)); + std::memset(c_.data(), 0.0f, c_.size() * sizeof(float)); + + triggerd_ = false; + temp_end_ = 0; + current_sample_ = 0; + + speakStart_.clear(); + speakEnd_.clear(); + + states_.clear(); +} + +bool Vad::Initialize() { + // input & output holder + inputTensors_.resize(4); + outputTensors_.resize(3); + + // input shape + input_node_dims_.emplace_back(1); + input_node_dims_.emplace_back(window_size_samples_); + // sr buffer + sr_.resize(1); + sr_[0] = sample_rate_; + // hidden state buffer + h_.resize(size_hc_); + c_.resize(size_hc_); + + Reset(); + + // InitRuntime + if (!InitRuntime()) { + fastdeploy::FDERROR << "Failed to initialize fastdeploy backend." + << std::endl; + return false; + } + fastdeploy::FDINFO << "init done."; + return true; +} + +bool Vad::ForwardChunk(std::vector& chunk) { + // last chunk may not be window_size_samples_ + input_node_dims_.back() = chunk.size(); + assert(window_size_samples_ >= chunk.size()); + current_chunk_size_ = chunk.size(); + + inputTensors_[0].name = "input"; + inputTensors_[0].SetExternalData( + input_node_dims_, fastdeploy::FDDataType::FP32, chunk.data()); + inputTensors_[1].name = "sr"; + inputTensors_[1].SetExternalData( + sr_node_dims_, fastdeploy::FDDataType::INT64, sr_.data()); + inputTensors_[2].name = "h"; + inputTensors_[2].SetExternalData( + hc_node_dims_, fastdeploy::FDDataType::FP32, h_.data()); + inputTensors_[3].name = "c"; + inputTensors_[3].SetExternalData( + hc_node_dims_, fastdeploy::FDDataType::FP32, c_.data()); + + if (!Infer(inputTensors_, &outputTensors_)) { + return false; + } + + // Push forward sample index + current_sample_ += current_chunk_size_; + return true; +} + +const Vad::State& Vad::Postprocess() { + // update prob, h, c + outputProb_ = *(float*)outputTensors_[0].Data(); + auto* hn = static_cast(outputTensors_[1].MutableData()); + std::memcpy(h_.data(), hn, h_.size() * sizeof(float)); + auto* cn = static_cast(outputTensors_[2].MutableData()); + std::memcpy(c_.data(), cn, c_.size() * sizeof(float)); + + if (outputProb_ < threshold_ && !triggerd_) { + // 1. Silence + LOG_DEBUG << "{ silence: " << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; + states_.emplace_back(Vad::State::SIL); + } else if (outputProb_ >= threshold_ && !triggerd_) { + // 2. Start + triggerd_ = true; + speech_start_ = + current_sample_ - current_chunk_size_ - speech_pad_left_samples_; + float start_sec = 1.0 * speech_start_ / sample_rate_; + speakStart_.emplace_back(start_sec); + LOG_DEBUG << "{ speech start: " << start_sec + << " s; prob: " << outputProb_ << " }"; + states_.emplace_back(Vad::State::START); + } else if (outputProb_ >= threshold_ - 0.15 && triggerd_) { + // 3. Continue + + if (temp_end_ != 0) { + // speech prob relaxation, speech continues again + LOG_DEBUG << "{ speech fake end(sil < min_silence_ms) to continue: " + << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; + temp_end_ = 0; + } else { + // speech prob relaxation, keep tracking speech + LOG_DEBUG << "{ speech continue: " + << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; + } + + states_.emplace_back(Vad::State::SPEECH); + } else if (outputProb_ < threshold_ - 0.15 && triggerd_) { + // 4. End + if (temp_end_ == 0) { + temp_end_ = current_sample_; + } + + // check possible speech end + if (current_sample_ - temp_end_ < min_silence_samples_) { + // a. silence < min_slience_samples, continue speaking + LOG_DEBUG << "{ speech fake end(sil < min_silence_ms): " + << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; + states_.emplace_back(Vad::State::SIL); + } else { + // b. silence >= min_slience_samples, end speaking + speech_end_ = current_sample_ + speech_pad_right_samples_; + temp_end_ = 0; + triggerd_ = false; + auto end_sec = 1.0 * speech_end_ / sample_rate_; + speakEnd_.emplace_back(end_sec); + LOG_DEBUG << "{ speech end: " << end_sec + << " s; prob: " << outputProb_ << " }"; + states_.emplace_back(Vad::State::END); + } + } + + return states_.back(); +} + +const std::vector> Vad::GetResult( + float removeThreshold, + float expandHeadThreshold, + float expandTailThreshold, + float mergeThreshold) const { + float audioLength = 1.0 * current_sample_ / sample_rate_; + if (speakStart_.empty() && speakEnd_.empty()) { + return {}; + } + if (speakEnd_.size() != speakStart_.size()) { + // set the audio length as the last end + speakEnd_.emplace_back(audioLength); + } + // Remove too short segments + // auto startIter = speakStart_.begin(); + // auto endIter = speakEnd_.begin(); + // while (startIter != speakStart_.end()) { + // if (removeThreshold < audioLength && + // *endIter - *startIter < removeThreshold) { + // startIter = speakStart_.erase(startIter); + // endIter = speakEnd_.erase(endIter); + // } else { + // startIter++; + // endIter++; + // } + // } + // // Expand to avoid to tight cut. + // startIter = speakStart_.begin(); + // endIter = speakEnd_.begin(); + // *startIter = std::fmax(0.f, *startIter - expandHeadThreshold); + // *endIter = std::fmin(*endIter + expandTailThreshold, *(startIter + 1)); + // endIter = speakEnd_.end() - 1; + // startIter = speakStart_.end() - 1; + // *startIter = fmax(*startIter - expandHeadThreshold, *(endIter - 1)); + // *endIter = std::fmin(*endIter + expandTailThreshold, audioLength); + // for (int i = 1; i < speakStart_.size() - 1; ++i) { + // speakStart_[i] = std::fmax(speakStart_[i] - expandHeadThreshold, + // speakEnd_[i - 1]); + // speakEnd_[i] = std::fmin(speakEnd_[i] + expandTailThreshold, + // speakStart_[i + 1]); + // } + // // Merge very closed segments + // startIter = speakStart_.begin() + 1; + // endIter = speakEnd_.begin(); + // while (startIter != speakStart_.end()) { + // if (*startIter - *endIter < mergeThreshold) { + // startIter = speakStart_.erase(startIter); + // endIter = speakEnd_.erase(endIter); + // } else { + // startIter++; + // endIter++; + // } + // } + + std::vector> result; + for (int i = 0; i < speakStart_.size(); ++i) { + result.emplace_back(std::map( + {{"start", speakStart_[i]}, {"end", speakEnd_[i]}})); + } + return result; +} + +std::ostream& operator<<(std::ostream& os, const Vad::State& s) { + switch (s) { + case Vad::State::SIL: + os << "[SIL]"; + break; + case Vad::State::START: + os << "[STA]"; + break; + case Vad::State::SPEECH: + os << "[SPE]"; + break; + case Vad::State::END: + os << "[END]"; + break; + default: + // illegal state + os << "[ILL]"; + break; + } + return os; +} \ No newline at end of file diff --git a/runtime/engine/vad/vad.h b/runtime/engine/vad/vad.h new file mode 100644 index 000000000..6eed7d1c3 --- /dev/null +++ b/runtime/engine/vad/vad.h @@ -0,0 +1,124 @@ +// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include +#include "./wav.h" +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/runtime.h" + +class Vad : public fastdeploy::FastDeployModel { + public: + enum class State { SIL = 0, START, SPEECH, END }; + friend std::ostream& operator<<(std::ostream& os, const Vad::State& s); + + Vad(const std::string& model_file, + const fastdeploy::RuntimeOption& custom_option = + fastdeploy::RuntimeOption()); + + void Init(); + + void Reset(); + + void SetConfig(int sr, + int frame_ms, + float threshold, + int min_silence_duration_ms, + int speech_pad_left_ms, + int speech_pad_right_ms); + + bool ForwardChunk(std::vector& chunk); + + const State& Postprocess(); + + const std::vector> GetResult( + float removeThreshold = 0.0, + float expandHeadThreshold = 0.0, + float expandTailThreshold = 0, + float mergeThreshold = 0.0) const; + + const std::vector GetStates() const { return states_; } + + int SampleRate() const { return sample_rate_; } + + int FrameMs() const { return frame_ms_; } + int64_t WindowSizeSamples() const { return window_size_samples_; } + + float Threshold() const { return threshold_; } + + int MinSilenceDurationMs() const { + return min_silence_samples_ / sample_rate_; + } + int SpeechPadLeftMs() const { + return speech_pad_left_samples_ / sample_rate_; + } + int SpeechPadRightMs() const { + return speech_pad_right_samples_ / sample_rate_; + } + + int MinSilenceSamples() const { return min_silence_samples_; } + int SpeechPadLeftSamples() const { return speech_pad_left_samples_; } + int SpeechPadRightSamples() const { return speech_pad_right_samples_; } + + std::string ModelName() const override; + + private: + bool Initialize(); + + private: + std::once_flag init_; + // input and output + std::vector inputTensors_; + std::vector outputTensors_; + + // model states + bool triggerd_ = false; + unsigned int speech_start_ = 0; + unsigned int speech_end_ = 0; + unsigned int temp_end_ = 0; + unsigned int current_sample_ = 0; + unsigned int current_chunk_size_ = 0; + // MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes + float outputProb_; + + std::vector speakStart_; + mutable std::vector speakEnd_; + + std::vector states_; + + /* ======================================================================== + */ + int sample_rate_ = 16000; + int frame_ms_ = 32; // 32, 64, 96 for 16k + float threshold_ = 0.5f; + + int64_t window_size_samples_; // support 256 512 768 for 8k; 512 1024 1536 + // for 16k. + int sr_per_ms_; // support 8 or 16 + int min_silence_samples_; // sr_per_ms_ * frame_ms_ + int speech_pad_left_samples_{0}; // usually 250ms + int speech_pad_right_samples_{0}; // usually 0 + + /* ======================================================================== + */ + std::vector sr_; + const size_t size_hc_ = 2 * 1 * 64; // It's FIXED. + std::vector h_; + std::vector c_; + + std::vector input_node_dims_; + const std::vector sr_node_dims_ = {1}; + const std::vector hc_node_dims_ = {2, 1, 64}; +}; diff --git a/runtime/engine/vad/wav.h b/runtime/engine/vad/wav.h new file mode 100644 index 000000000..6d1a6f723 --- /dev/null +++ b/runtime/engine/vad/wav.h @@ -0,0 +1,197 @@ +// Copyright (c) 2016 Personal (Binbin Zhang) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include +#include +#include +#include + +namespace wav { + +struct WavHeader { + char riff[4]; // "riff" + unsigned int size; + char wav[4]; // "WAVE" + char fmt[4]; // "fmt " + unsigned int fmt_size; + uint16_t format; + uint16_t channels; + unsigned int sample_rate; + unsigned int bytes_per_second; + uint16_t block_size; + uint16_t bit; + char data[4]; // "data" + unsigned int data_size; +}; + +class WavReader { + public: + WavReader() : data_(nullptr) {} + explicit WavReader(const std::string& filename) { Open(filename); } + + bool Open(const std::string& filename) { + FILE* fp = fopen(filename.c_str(), "rb"); + if (NULL == fp) { + std::cout << "Error in read " << filename; + return false; + } + + WavHeader header; + fread(&header, 1, sizeof(header), fp); + if (header.fmt_size < 16) { + fprintf(stderr, + "WaveData: expect PCM format data " + "to have fmt chunk of at least size 16.\n"); + return false; + } else if (header.fmt_size > 16) { + int offset = 44 - 8 + header.fmt_size - 16; + fseek(fp, offset, SEEK_SET); + fread(header.data, 8, sizeof(char), fp); + } + // check "riff" "WAVE" "fmt " "data" + + // Skip any sub-chunks between "fmt" and "data". Usually there will + // be a single "fact" sub chunk, but on Windows there can also be a + // "list" sub chunk. + while (0 != strncmp(header.data, "data", 4)) { + // We will just ignore the data in these chunks. + fseek(fp, header.data_size, SEEK_CUR); + // read next sub chunk + fread(header.data, 8, sizeof(char), fp); + } + + num_channel_ = header.channels; + sample_rate_ = header.sample_rate; + bits_per_sample_ = header.bit; + int num_data = header.data_size / (bits_per_sample_ / 8); + data_ = new float[num_data]; // Create 1-dim array + num_samples_ = num_data / num_channel_; + + for (int i = 0; i < num_data; ++i) { + switch (bits_per_sample_) { + case 8: { + char sample; + fread(&sample, 1, sizeof(char), fp); + data_[i] = static_cast(sample); + break; + } + case 16: { + int16_t sample; + fread(&sample, 1, sizeof(int16_t), fp); + // std::cout << sample; + data_[i] = static_cast(sample); + // std::cout << data_[i]; + break; + } + case 32: { + int sample; + fread(&sample, 1, sizeof(int), fp); + data_[i] = static_cast(sample); + break; + } + default: + fprintf(stderr, "unsupported quantization bits"); + exit(1); + } + } + fclose(fp); + return true; + } + + int num_channel() const { return num_channel_; } + int sample_rate() const { return sample_rate_; } + int bits_per_sample() const { return bits_per_sample_; } + int num_samples() const { return num_samples_; } + const float* data() const { return data_; } + + private: + int num_channel_; + int sample_rate_; + int bits_per_sample_; + int num_samples_; // sample points per channel + float* data_; +}; + +class WavWriter { + public: + WavWriter(const float* data, + int num_samples, + int num_channel, + int sample_rate, + int bits_per_sample) + : data_(data), + num_samples_(num_samples), + num_channel_(num_channel), + sample_rate_(sample_rate), + bits_per_sample_(bits_per_sample) {} + + void Write(const std::string& filename) { + FILE* fp = fopen(filename.c_str(), "w"); + // init char 'riff' 'WAVE' 'fmt ' 'data' + WavHeader header; + char wav_header[44] = { + 0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56, + 0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00}; + memcpy(&header, wav_header, sizeof(header)); + header.channels = num_channel_; + header.bit = bits_per_sample_; + header.sample_rate = sample_rate_; + header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8); + header.size = sizeof(header) - 8 + header.data_size; + header.bytes_per_second = + sample_rate_ * num_channel_ * (bits_per_sample_ / 8); + header.block_size = num_channel_ * (bits_per_sample_ / 8); + + fwrite(&header, 1, sizeof(header), fp); + + for (int i = 0; i < num_samples_; ++i) { + for (int j = 0; j < num_channel_; ++j) { + switch (bits_per_sample_) { + case 8: { + char sample = + static_cast(data_[i * num_channel_ + j]); + fwrite(&sample, 1, sizeof(sample), fp); + break; + } + case 16: { + int16_t sample = + static_cast(data_[i * num_channel_ + j]); + fwrite(&sample, 1, sizeof(sample), fp); + break; + } + case 32: { + int sample = + static_cast(data_[i * num_channel_ + j]); + fwrite(&sample, 1, sizeof(sample), fp); + break; + } + } + } + } + fclose(fp); + } + + private: + const float* data_; + int num_samples_; // total float points in data_ + int num_channel_; + int sample_rate_; + int bits_per_sample_; +}; + +} // namespace wav