// matrix/kaldi-matrix.cc // Copyright 2009-2011 Lukas Burget; Ondrej Glembek; Go Vivace Inc.; // Microsoft Corporation; Saarland University; // Yanmin Qian; Petr Schwarz; Jan Silovsky; // Haihua Xu // 2017 Shiyin Kang // 2019 Yiwen Shao // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include "matrix/kaldi-matrix.h" namespace kaldi { /* template void MatrixBase::Invert(Real *log_det, Real *det_sign, bool inverse_needed) { KALDI_ASSERT(num_rows_ == num_cols_); if (num_rows_ == 0) { if (det_sign) *det_sign = 1; if (log_det) *log_det = 0.0; return; } #ifndef HAVE_ATLAS KaldiBlasInt *pivot = new KaldiBlasInt[num_rows_]; KaldiBlasInt M = num_rows_; KaldiBlasInt N = num_cols_; KaldiBlasInt LDA = stride_; KaldiBlasInt result = -1; KaldiBlasInt l_work = std::max(1, N); Real *p_work; void *temp; if ((p_work = static_cast( KALDI_MEMALIGN(16, sizeof(Real)*l_work, &temp))) == NULL) { delete[] pivot; throw std::bad_alloc(); } clapack_Xgetrf2(&M, &N, data_, &LDA, pivot, &result); const int pivot_offset = 1; #else int *pivot = new int[num_rows_]; int result; clapack_Xgetrf(num_rows_, num_cols_, data_, stride_, pivot, &result); const int pivot_offset = 0; #endif KALDI_ASSERT(result >= 0 && "Call to CLAPACK sgetrf_ or ATLAS clapack_sgetrf " "called with wrong arguments"); if (result > 0) { if (inverse_needed) { KALDI_ERR << "Cannot invert: matrix is singular"; } else { if (log_det) *log_det = -std::numeric_limits::infinity(); if (det_sign) *det_sign = 0; delete[] pivot; #ifndef HAVE_ATLAS KALDI_MEMALIGN_FREE(p_work); #endif return; } } if (det_sign != NULL) { int sign = 1; for (MatrixIndexT i = 0; i < num_rows_; i++) if (pivot[i] != static_cast(i) + pivot_offset) sign *= -1; *det_sign = sign; } if (log_det != NULL || det_sign != NULL) { // Compute log determinant. if (log_det != NULL) *log_det = 0.0; Real prod = 1.0; for (MatrixIndexT i = 0; i < num_rows_; i++) { prod *= (*this)(i, i); if (i == num_rows_ - 1 || std::fabs(prod) < 1.0e-10 || std::fabs(prod) > 1.0e+10) { if (log_det != NULL) *log_det += kaldi::Log(std::fabs(prod)); if (det_sign != NULL) *det_sign *= (prod > 0 ? 1.0 : -1.0); prod = 1.0; } } } #ifndef HAVE_ATLAS if (inverse_needed) clapack_Xgetri2(&M, data_, &LDA, pivot, p_work, &l_work, &result); delete[] pivot; KALDI_MEMALIGN_FREE(p_work); #else if (inverse_needed) clapack_Xgetri(num_rows_, data_, stride_, pivot, &result); delete [] pivot; #endif KALDI_ASSERT(result == 0 && "Call to CLAPACK sgetri_ or ATLAS clapack_sgetri " "called with wrong arguments"); } template<> template<> void MatrixBase::AddVecVec(const float alpha, const VectorBase &a, const VectorBase &rb) { KALDI_ASSERT(a.Dim() == num_rows_ && rb.Dim() == num_cols_); cblas_Xger(a.Dim(), rb.Dim(), alpha, a.Data(), 1, rb.Data(), 1, data_, stride_); } template template void MatrixBase::AddVecVec(const Real alpha, const VectorBase &a, const VectorBase &b) { KALDI_ASSERT(a.Dim() == num_rows_ && b.Dim() == num_cols_); if (num_rows_ * num_cols_ > 100) { // It's probably worth it to allocate // temporary vectors of the right type and use BLAS. Vector temp_a(a), temp_b(b); cblas_Xger(num_rows_, num_cols_, alpha, temp_a.Data(), 1, temp_b.Data(), 1, data_, stride_); } else { const OtherReal *a_data = a.Data(), *b_data = b.Data(); Real *row_data = data_; for (MatrixIndexT i = 0; i < num_rows_; i++, row_data += stride_) { BaseFloat alpha_ai = alpha * a_data[i]; for (MatrixIndexT j = 0; j < num_cols_; j++) row_data[j] += alpha_ai * b_data[j]; } } } // instantiate the template above. template void MatrixBase::AddVecVec(const float alpha, const VectorBase &a, const VectorBase &b); template void MatrixBase::AddVecVec(const double alpha, const VectorBase &a, const VectorBase &b); template<> template<> void MatrixBase::AddVecVec(const double alpha, const VectorBase &a, const VectorBase &rb) { KALDI_ASSERT(a.Dim() == num_rows_ && rb.Dim() == num_cols_); if (num_rows_ == 0) return; cblas_Xger(a.Dim(), rb.Dim(), alpha, a.Data(), 1, rb.Data(), 1, data_, stride_); } template void MatrixBase::AddMatMat(const Real alpha, const MatrixBase& A, MatrixTransposeType transA, const MatrixBase& B, MatrixTransposeType transB, const Real beta) { KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) || (transA == kTrans && transB == kNoTrans && A.num_rows_ == B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) || (transA == kNoTrans && transB == kTrans && A.num_cols_ == B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) || (transA == kTrans && transB == kTrans && A.num_rows_ == B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); KALDI_ASSERT(&A != this && &B != this); if (num_rows_ == 0) return; cblas_Xgemm(alpha, transA, A.data_, A.num_rows_, A.num_cols_, A.stride_, transB, B.data_, B.stride_, beta, data_, num_rows_, num_cols_, stride_); } template void MatrixBase::SetMatMatDivMat(const MatrixBase& A, const MatrixBase& B, const MatrixBase& C) { KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols()); KALDI_ASSERT(A.NumRows() == C.NumRows() && A.NumCols() == C.NumCols()); for (int32 r = 0; r < A.NumRows(); r++) { // each frame... for (int32 c = 0; c < A.NumCols(); c++) { BaseFloat i = C(r, c), o = B(r, c), od = A(r, c), id; if (i != 0.0) { id = od * (o / i); /// o / i is either zero or "scale". } else { id = od; /// Just imagine the scale was 1.0. This is somehow true in /// expectation; anyway, this case should basically never happen so it doesn't /// really matter. } (*this)(r, c) = id; } } } */ //template //void MatrixBase::CopyLowerToUpper() { //KALDI_ASSERT(num_rows_ == num_cols_); //Real *data = data_; //MatrixIndexT num_rows = num_rows_, stride = stride_; //for (int32 i = 0; i < num_rows; i++) //for (int32 j = 0; j < i; j++) //data[j * stride + i ] = data[i * stride + j]; //} //template //void MatrixBase::CopyUpperToLower() { //KALDI_ASSERT(num_rows_ == num_cols_); //Real *data = data_; //MatrixIndexT num_rows = num_rows_, stride = stride_; //for (int32 i = 0; i < num_rows; i++) //for (int32 j = 0; j < i; j++) //data[i * stride + j] = data[j * stride + i]; //} /* template void MatrixBase::SymAddMat2(const Real alpha, const MatrixBase &A, MatrixTransposeType transA, Real beta) { KALDI_ASSERT(num_rows_ == num_cols_ && ((transA == kNoTrans && A.num_rows_ == num_rows_) || (transA == kTrans && A.num_cols_ == num_cols_))); KALDI_ASSERT(A.data_ != data_); if (num_rows_ == 0) return; /// When the matrix dimension(this->num_rows_) is not less than 56 /// and the transpose type transA == kTrans, the cblas_Xsyrk(...) /// function will produce NaN in the output. This is a bug in the /// ATLAS library. To overcome this, the AddMatMat function, which calls /// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special /// sitation. /// Wei Shi: Note this bug is observerd for single precision matrix /// on a 64-bit machine #ifdef HAVE_ATLAS if (transA == kTrans && num_rows_ >= 56) { this->AddMatMat(alpha, A, kTrans, A, kNoTrans, beta); return; } #endif // HAVE_ATLAS MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_); // This function call is hard-coded to update the lower triangle. cblas_Xsyrk(transA, num_rows_, A_other_dim, alpha, A.Data(), A.Stride(), beta, this->data_, this->stride_); } template void MatrixBase::AddMatSmat(const Real alpha, const MatrixBase &A, MatrixTransposeType transA, const MatrixBase &B, MatrixTransposeType transB, const Real beta) { KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) || (transA == kTrans && transB == kNoTrans && A.num_rows_ == B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) || (transA == kNoTrans && transB == kTrans && A.num_cols_ == B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) || (transA == kTrans && transB == kTrans && A.num_rows_ == B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); KALDI_ASSERT(&A != this && &B != this); // We iterate over the columns of B. MatrixIndexT Astride = A.stride_, Bstride = B.stride_, stride = this->stride_, Arows = A.num_rows_, Acols = A.num_cols_; Real *data = this->data_, *Adata = A.data_, *Bdata = B.data_; MatrixIndexT num_cols = this->num_cols_; if (transB == kNoTrans) { // Iterate over the columns of *this and of B. for (MatrixIndexT c = 0; c < num_cols; c++) { // for each column of *this, do // [this column] = [alpha * A * this column of B] + [beta * this column] Xgemv_sparsevec(transA, Arows, Acols, alpha, Adata, Astride, Bdata + c, Bstride, beta, data + c, stride); } } else { // Iterate over the columns of *this and the rows of B. for (MatrixIndexT c = 0; c < num_cols; c++) { // for each column of *this, do // [this column] = [alpha * A * this row of B] + [beta * this column] Xgemv_sparsevec(transA, Arows, Acols, alpha, Adata, Astride, Bdata + (c * Bstride), 1, beta, data + c, stride); } } } template void MatrixBase::AddSmatMat(const Real alpha, const MatrixBase &A, MatrixTransposeType transA, const MatrixBase &B, MatrixTransposeType transB, const Real beta) { KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ == B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_) || (transA == kTrans && transB == kNoTrans && A.num_rows_ == B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_) || (transA == kNoTrans && transB == kTrans && A.num_cols_ == B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_) || (transA == kTrans && transB == kTrans && A.num_rows_ == B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_)); KALDI_ASSERT(&A != this && &B != this); MatrixIndexT Astride = A.stride_, Bstride = B.stride_, stride = this->stride_, Brows = B.num_rows_, Bcols = B.num_cols_; MatrixTransposeType invTransB = (transB == kTrans ? kNoTrans : kTrans); Real *data = this->data_, *Adata = A.data_, *Bdata = B.data_; MatrixIndexT num_rows = this->num_rows_; if (transA == kNoTrans) { // Iterate over the rows of *this and of A. for (MatrixIndexT r = 0; r < num_rows; r++) { // for each row of *this, do // [this row] = [alpha * (this row of A) * B^T] + [beta * this row] Xgemv_sparsevec(invTransB, Brows, Bcols, alpha, Bdata, Bstride, Adata + (r * Astride), 1, beta, data + (r * stride), 1); } } else { // Iterate over the rows of *this and the columns of A. for (MatrixIndexT r = 0; r < num_rows; r++) { // for each row of *this, do // [this row] = [alpha * (this column of A) * B^T] + [beta * this row] Xgemv_sparsevec(invTransB, Brows, Bcols, alpha, Bdata, Bstride, Adata + r, Astride, beta, data + (r * stride), 1); } } } template void MatrixBase::AddSpSp(const Real alpha, const SpMatrix &A_in, const SpMatrix &B_in, const Real beta) { MatrixIndexT sz = num_rows_; KALDI_ASSERT(sz == num_cols_ && sz == A_in.NumRows() && sz == B_in.NumRows()); Matrix A(A_in), B(B_in); // CblasLower or CblasUpper would work below as symmetric matrix is copied // fully (to save work, we used the matrix constructor from SpMatrix). // CblasLeft means A is on the left: C <-- alpha A B + beta C if (sz == 0) return; cblas_Xsymm(alpha, sz, A.data_, A.stride_, B.data_, B.stride_, beta, data_, stride_); } template void MatrixBase::AddMat(const Real alpha, const MatrixBase& A, MatrixTransposeType transA) { if (&A == this) { if (transA == kNoTrans) { Scale(alpha + 1.0); } else { KALDI_ASSERT(num_rows_ == num_cols_ && "AddMat: adding to self (transposed): not symmetric."); Real *data = data_; if (alpha == 1.0) { // common case-- handle separately. for (MatrixIndexT row = 0; row < num_rows_; row++) { for (MatrixIndexT col = 0; col < row; col++) { Real *lower = data + (row * stride_) + col, *upper = data + (col * stride_) + row; Real sum = *lower + *upper; *lower = *upper = sum; } *(data + (row * stride_) + row) *= 2.0; // diagonal. } } else { for (MatrixIndexT row = 0; row < num_rows_; row++) { for (MatrixIndexT col = 0; col < row; col++) { Real *lower = data + (row * stride_) + col, *upper = data + (col * stride_) + row; Real lower_tmp = *lower; *lower += alpha * *upper; *upper += alpha * lower_tmp; } *(data + (row * stride_) + row) *= (1.0 + alpha); // diagonal. } } } } else { int aStride = (int) A.stride_, stride = stride_; Real *adata = A.data_, *data = data_; if (transA == kNoTrans) { KALDI_ASSERT(A.num_rows_ == num_rows_ && A.num_cols_ == num_cols_); if (num_rows_ == 0) return; for (MatrixIndexT row = 0; row < num_rows_; row++, adata += aStride, data += stride) { cblas_Xaxpy(num_cols_, alpha, adata, 1, data, 1); } } else { KALDI_ASSERT(A.num_cols_ == num_rows_ && A.num_rows_ == num_cols_); if (num_rows_ == 0) return; for (MatrixIndexT row = 0; row < num_rows_; row++, adata++, data += stride) cblas_Xaxpy(num_cols_, alpha, adata, aStride, data, 1); } } } template void MatrixBase::AddSmat(Real alpha, const SparseMatrix &A, MatrixTransposeType trans) { if (trans == kNoTrans) { KALDI_ASSERT(NumRows() == A.NumRows()); KALDI_ASSERT(NumCols() == A.NumCols()); MatrixIndexT a_num_rows = A.NumRows(); for (MatrixIndexT i = 0; i < a_num_rows; ++i) { const SparseVector &row = A.Row(i); MatrixIndexT num_elems = row.NumElements(); for (MatrixIndexT id = 0; id < num_elems; ++id) { (*this)(i, row.GetElement(id).first) += alpha * row.GetElement(id).second; } } } else { KALDI_ASSERT(NumRows() == A.NumCols()); KALDI_ASSERT(NumCols() == A.NumRows()); MatrixIndexT a_num_rows = A.NumRows(); for (MatrixIndexT i = 0; i < a_num_rows; ++i) { const SparseVector &row = A.Row(i); MatrixIndexT num_elems = row.NumElements(); for (MatrixIndexT id = 0; id < num_elems; ++id) { (*this)(row.GetElement(id).first, i) += alpha * row.GetElement(id).second; } } } } template void MatrixBase::AddSmatMat(Real alpha, const SparseMatrix &A, MatrixTransposeType transA, const MatrixBase &B, Real beta) { if (transA == kNoTrans) { KALDI_ASSERT(NumRows() == A.NumRows()); KALDI_ASSERT(NumCols() == B.NumCols()); KALDI_ASSERT(A.NumCols() == B.NumRows()); this->Scale(beta); MatrixIndexT a_num_rows = A.NumRows(), this_num_cols = this->NumCols(); for (MatrixIndexT i = 0; i < a_num_rows; ++i) { Real *this_row_i = this->RowData(i); const SparseVector &A_row_i = A.Row(i); MatrixIndexT num_elems = A_row_i.NumElements(); for (MatrixIndexT e = 0; e < num_elems; ++e) { const std::pair &p = A_row_i.GetElement(e); MatrixIndexT k = p.first; Real alpha_A_ik = alpha * p.second; const Real *b_row_k = B.RowData(k); cblas_Xaxpy(this_num_cols, alpha_A_ik, b_row_k, 1, this_row_i, 1); //for (MatrixIndexT j = 0; j < this_num_cols; ++j) // this_row_i[j] += alpha_A_ik * b_row_k[j]; } } } else { KALDI_ASSERT(NumRows() == A.NumCols()); KALDI_ASSERT(NumCols() == B.NumCols()); KALDI_ASSERT(A.NumRows() == B.NumRows()); this->Scale(beta); Matrix buf(NumRows(), NumCols(), kSetZero); MatrixIndexT a_num_rows = A.NumRows(), this_num_cols = this->NumCols(); for (int k = 0; k < a_num_rows; ++k) { const Real *b_row_k = B.RowData(k); const SparseVector &A_row_k = A.Row(k); MatrixIndexT num_elems = A_row_k.NumElements(); for (MatrixIndexT e = 0; e < num_elems; ++e) { const std::pair &p = A_row_k.GetElement(e); MatrixIndexT i = p.first; Real alpha_A_ki = alpha * p.second; Real *this_row_i = this->RowData(i); cblas_Xaxpy(this_num_cols, alpha_A_ki, b_row_k, 1, this_row_i, 1); //for (MatrixIndexT j = 0; j < this_num_cols; ++j) // this_row_i[j] += alpha_A_ki * b_row_k[j]; } } } } template void MatrixBase::AddMatSmat(Real alpha, const MatrixBase &A, const SparseMatrix &B, MatrixTransposeType transB, Real beta) { if (transB == kNoTrans) { KALDI_ASSERT(NumRows() == A.NumRows()); KALDI_ASSERT(NumCols() == B.NumCols()); KALDI_ASSERT(A.NumCols() == B.NumRows()); this->Scale(beta); MatrixIndexT b_num_rows = B.NumRows(), this_num_rows = this->NumRows(); // Iterate over the rows of sparse matrix B and columns of A. for (MatrixIndexT k = 0; k < b_num_rows; ++k) { const SparseVector &B_row_k = B.Row(k); MatrixIndexT num_elems = B_row_k.NumElements(); const Real *a_col_k = A.Data() + k; for (MatrixIndexT e = 0; e < num_elems; ++e) { const std::pair &p = B_row_k.GetElement(e); MatrixIndexT j = p.first; Real alpha_B_kj = alpha * p.second; Real *this_col_j = this->Data() + j; // Add to entire 'j'th column of *this at once using cblas_Xaxpy. // pass stride to write a colmun as matrices are stored in row major order. cblas_Xaxpy(this_num_rows, alpha_B_kj, a_col_k, A.stride_, this_col_j, this->stride_); //for (MatrixIndexT i = 0; i < this_num_rows; ++i) // this_col_j[i*this->stride_] += alpha_B_kj * a_col_k[i*A.stride_]; } } } else { KALDI_ASSERT(NumRows() == A.NumRows()); KALDI_ASSERT(NumCols() == B.NumRows()); KALDI_ASSERT(A.NumCols() == B.NumCols()); this->Scale(beta); MatrixIndexT b_num_rows = B.NumRows(), this_num_rows = this->NumRows(); // Iterate over the rows of sparse matrix B and columns of *this. for (MatrixIndexT j = 0; j < b_num_rows; ++j) { const SparseVector &B_row_j = B.Row(j); MatrixIndexT num_elems = B_row_j.NumElements(); Real *this_col_j = this->Data() + j; for (MatrixIndexT e = 0; e < num_elems; ++e) { const std::pair &p = B_row_j.GetElement(e); MatrixIndexT k = p.first; Real alpha_B_jk = alpha * p.second; const Real *a_col_k = A.Data() + k; // Add to entire 'j'th column of *this at once using cblas_Xaxpy. // pass stride to write a column as matrices are stored in row major order. cblas_Xaxpy(this_num_rows, alpha_B_jk, a_col_k, A.stride_, this_col_j, this->stride_); //for (MatrixIndexT i = 0; i < this_num_rows; ++i) // this_col_j[i*this->stride_] += alpha_B_jk * a_col_k[i*A.stride_]; } } } } template template void MatrixBase::AddSp(const Real alpha, const SpMatrix &S) { KALDI_ASSERT(S.NumRows() == NumRows() && S.NumRows() == NumCols()); Real *data = data_; const OtherReal *sdata = S.Data(); MatrixIndexT num_rows = NumRows(), stride = Stride(); for (MatrixIndexT i = 0; i < num_rows; i++) { for (MatrixIndexT j = 0; j < i; j++, sdata++) { data[i*stride + j] += alpha * *sdata; data[j*stride + i] += alpha * *sdata; } data[i*stride + i] += alpha * *sdata++; } } // instantiate the template above. template void MatrixBase::AddSp(const float alpha, const SpMatrix &S); template void MatrixBase::AddSp(const double alpha, const SpMatrix &S); template void MatrixBase::AddSp(const float alpha, const SpMatrix &S); template void MatrixBase::AddSp(const double alpha, const SpMatrix &S); template void MatrixBase::AddDiagVecMat( const Real alpha, const VectorBase &v, const MatrixBase &M, MatrixTransposeType transM, Real beta) { if (beta != 1.0) this->Scale(beta); if (transM == kNoTrans) { KALDI_ASSERT(SameDim(*this, M)); } else { KALDI_ASSERT(M.NumRows() == NumCols() && M.NumCols() == NumRows()); } KALDI_ASSERT(v.Dim() == this->NumRows()); MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1, stride = stride_, num_rows = num_rows_, num_cols = num_cols_; if (transM == kTrans) std::swap(M_row_stride, M_col_stride); Real *data = data_; const Real *Mdata = M.Data(), *vdata = v.Data(); if (num_rows_ == 0) return; for (MatrixIndexT i = 0; i < num_rows; i++, data += stride, Mdata += M_row_stride, vdata++) cblas_Xaxpy(num_cols, alpha * *vdata, Mdata, M_col_stride, data, 1); } template void MatrixBase::AddMatDiagVec( const Real alpha, const MatrixBase &M, MatrixTransposeType transM, VectorBase &v, Real beta) { if (beta != 1.0) this->Scale(beta); if (transM == kNoTrans) { KALDI_ASSERT(SameDim(*this, M)); } else { KALDI_ASSERT(M.NumRows() == NumCols() && M.NumCols() == NumRows()); } KALDI_ASSERT(v.Dim() == this->NumCols()); MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1, stride = stride_, num_rows = num_rows_, num_cols = num_cols_; if (transM == kTrans) std::swap(M_row_stride, M_col_stride); Real *data = data_; const Real *Mdata = M.Data(), *vdata = v.Data(); if (num_rows_ == 0) return; for (MatrixIndexT i = 0; i < num_rows; i++){ for(MatrixIndexT j = 0; j < num_cols; j ++ ){ data[i*stride + j] += alpha * vdata[j] * Mdata[i*M_row_stride + j*M_col_stride]; } } } template void MatrixBase::AddMatMatElements(const Real alpha, const MatrixBase& A, const MatrixBase& B, const Real beta) { KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols()); KALDI_ASSERT(A.NumRows() == NumRows() && A.NumCols() == NumCols()); Real *data = data_; const Real *dataA = A.Data(); const Real *dataB = B.Data(); for (MatrixIndexT i = 0; i < num_rows_; i++) { for (MatrixIndexT j = 0; j < num_cols_; j++) { data[j] = beta*data[j] + alpha*dataA[j]*dataB[j]; } data += Stride(); dataA += A.Stride(); dataB += B.Stride(); } } #if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD) // **************************************************************************** // **************************************************************************** template void MatrixBase::LapackGesvd(VectorBase *s, MatrixBase *U_in, MatrixBase *V_in) { KALDI_ASSERT(s != NULL && U_in != this && V_in != this); Matrix tmpU, tmpV; if (U_in == NULL) tmpU.Resize(this->num_rows_, 1); // work-space if U_in empty. if (V_in == NULL) tmpV.Resize(1, this->num_cols_); // work-space if V_in empty. /// Impementation notes: /// Lapack works in column-order, therefore the dimensions of *this are /// swapped as well as the U and V matrices. KaldiBlasInt M = num_cols_; KaldiBlasInt N = num_rows_; KaldiBlasInt LDA = Stride(); KALDI_ASSERT(N>=M); // NumRows >= columns. if (U_in) { KALDI_ASSERT((int)U_in->num_rows_ == N && (int)U_in->num_cols_ == M); } if (V_in) { KALDI_ASSERT((int)V_in->num_rows_ == M && (int)V_in->num_cols_ == M); } KALDI_ASSERT((int)s->Dim() == std::min(M, N)); MatrixBase *U = (U_in ? U_in : &tmpU); MatrixBase *V = (V_in ? V_in : &tmpV); KaldiBlasInt V_stride = V->Stride(); KaldiBlasInt U_stride = U->Stride(); // Original LAPACK recipe // KaldiBlasInt l_work = std::max(std::max // (1, 3*std::min(M, N)+std::max(M, N)), 5*std::min(M, N))*2; KaldiBlasInt l_work = -1; Real work_query; KaldiBlasInt result; // query for work space char *u_job = const_cast(U_in ? "s" : "N"); // "s" == skinny, "N" == "none." char *v_job = const_cast(V_in ? "s" : "N"); // "s" == skinny, "N" == "none." clapack_Xgesvd(v_job, u_job, &M, &N, data_, &LDA, s->Data(), V->Data(), &V_stride, U->Data(), &U_stride, &work_query, &l_work, &result); KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong arguments"); l_work = static_cast(work_query); Real *p_work; void *temp; if ((p_work = static_cast( KALDI_MEMALIGN(16, sizeof(Real)*l_work, &temp))) == NULL) throw std::bad_alloc(); // perform svd clapack_Xgesvd(v_job, u_job, &M, &N, data_, &LDA, s->Data(), V->Data(), &V_stride, U->Data(), &U_stride, p_work, &l_work, &result); KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong arguments"); if (result != 0) { KALDI_WARN << "CLAPACK sgesvd_ : some weird convergence not satisfied"; } KALDI_MEMALIGN_FREE(p_work); } #endif */ // Copy constructor. Copies data to newly allocated memory. template Matrix::Matrix (const MatrixBase & M, MatrixTransposeType trans/*=kNoTrans*/) : MatrixBase() { if (trans == kNoTrans) { Resize(M.num_rows_, M.num_cols_); this->CopyFromMat(M); } else { Resize(M.num_cols_, M.num_rows_); this->CopyFromMat(M, kTrans); } } // Copy constructor. Copies data to newly allocated memory. template Matrix::Matrix (const Matrix & M): MatrixBase() { Resize(M.num_rows_, M.num_cols_); this->CopyFromMat(M); } /// Copy constructor from another type. template template Matrix::Matrix(const MatrixBase & M, MatrixTransposeType trans) : MatrixBase() { if (trans == kNoTrans) { Resize(M.NumRows(), M.NumCols()); this->CopyFromMat(M); } else { Resize(M.NumCols(), M.NumRows()); this->CopyFromMat(M, kTrans); } } // Instantiate this constructor for float->double and double->float. template Matrix::Matrix(const MatrixBase & M, MatrixTransposeType trans); template Matrix::Matrix(const MatrixBase & M, MatrixTransposeType trans); template inline void Matrix::Init(const MatrixIndexT rows, const MatrixIndexT cols, const MatrixStrideType stride_type) { if (rows * cols == 0) { KALDI_ASSERT(rows == 0 && cols == 0); this->num_rows_ = 0; this->num_cols_ = 0; this->stride_ = 0; this->data_ = NULL; return; } KALDI_ASSERT(rows > 0 && cols > 0); MatrixIndexT skip, stride; size_t size; void *data; // aligned memory block void *temp; // memory block to be really freed // compute the size of skip and real cols skip = ((16 / sizeof(Real)) - cols % (16 / sizeof(Real))) % (16 / sizeof(Real)); stride = cols + skip; size = static_cast(rows) * static_cast(stride) * sizeof(Real); // allocate the memory and set the right dimensions and parameters if (NULL != (data = KALDI_MEMALIGN(16, size, &temp))) { MatrixBase::data_ = static_cast (data); MatrixBase::num_rows_ = rows; MatrixBase::num_cols_ = cols; MatrixBase::stride_ = (stride_type == kDefaultStride ? stride : cols); } else { throw std::bad_alloc(); } } template void Matrix::Resize(const MatrixIndexT rows, const MatrixIndexT cols, MatrixResizeType resize_type, MatrixStrideType stride_type) { // the next block uses recursion to handle what we have to do if // resize_type == kCopyData. if (resize_type == kCopyData) { if (this->data_ == NULL || rows == 0) resize_type = kSetZero; // nothing to copy. else if (rows == this->num_rows_ && cols == this->num_cols_ && (stride_type == kDefaultStride || this->stride_ == this->num_cols_)) { return; } // nothing to do. else { // set tmp to a matrix of the desired size; if new matrix // is bigger in some dimension, zero it. MatrixResizeType new_resize_type = (rows > this->num_rows_ || cols > this->num_cols_) ? kSetZero : kUndefined; Matrix tmp(rows, cols, new_resize_type, stride_type); MatrixIndexT rows_min = std::min(rows, this->num_rows_), cols_min = std::min(cols, this->num_cols_); tmp.Range(0, rows_min, 0, cols_min). CopyFromMat(this->Range(0, rows_min, 0, cols_min)); tmp.Swap(this); // and now let tmp go out of scope, deleting what was in *this. return; } } // At this point, resize_type == kSetZero or kUndefined. if (MatrixBase::data_ != NULL) { if (rows == MatrixBase::num_rows_ && cols == MatrixBase::num_cols_) { if (resize_type == kSetZero) this->SetZero(); return; } else Destroy(); } Init(rows, cols, stride_type); if (resize_type == kSetZero) MatrixBase::SetZero(); } template template void MatrixBase::CopyFromMat(const MatrixBase &M, MatrixTransposeType Trans) { if (sizeof(Real) == sizeof(OtherReal) && static_cast(M.Data()) == static_cast(this->Data())) { // CopyFromMat called on same data. Nothing to do (except sanity checks). KALDI_ASSERT(Trans == kNoTrans && M.NumRows() == NumRows() && M.NumCols() == NumCols() && M.Stride() == Stride()); return; } if (Trans == kNoTrans) { KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == M.NumCols()); for (MatrixIndexT i = 0; i < num_rows_; i++) (*this).Row(i).CopyFromVec(M.Row(i)); } else { KALDI_ASSERT(num_cols_ == M.NumRows() && num_rows_ == M.NumCols()); int32 this_stride = stride_, other_stride = M.Stride(); Real *this_data = data_; const OtherReal *other_data = M.Data(); for (MatrixIndexT i = 0; i < num_rows_; i++) for (MatrixIndexT j = 0; j < num_cols_; j++) this_data[i * this_stride + j] = other_data[j * other_stride + i]; } } // template instantiations. template void MatrixBase::CopyFromMat(const MatrixBase & M, MatrixTransposeType Trans); template void MatrixBase::CopyFromMat(const MatrixBase & M, MatrixTransposeType Trans); template void MatrixBase::CopyFromMat(const MatrixBase & M, MatrixTransposeType Trans); template void MatrixBase::CopyFromMat(const MatrixBase & M, MatrixTransposeType Trans); /* // Specialize the template for CopyFromSp for float, float. template<> template<> void MatrixBase::CopyFromSp(const SpMatrix & M) { KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_); MatrixIndexT num_rows = num_rows_, stride = stride_; const float *Mdata = M.Data(); float *row_data = data_, *col_data = data_; for (MatrixIndexT i = 0; i < num_rows; i++) { cblas_scopy(i+1, Mdata, 1, row_data, 1); // copy to the row. cblas_scopy(i, Mdata, 1, col_data, stride); // copy to the column. Mdata += i+1; row_data += stride; col_data += 1; } } // Specialize the template for CopyFromSp for double, double. template<> template<> void MatrixBase::CopyFromSp(const SpMatrix & M) { KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_); MatrixIndexT num_rows = num_rows_, stride = stride_; const double *Mdata = M.Data(); double *row_data = data_, *col_data = data_; for (MatrixIndexT i = 0; i < num_rows; i++) { cblas_dcopy(i+1, Mdata, 1, row_data, 1); // copy to the row. cblas_dcopy(i, Mdata, 1, col_data, stride); // copy to the column. Mdata += i+1; row_data += stride; col_data += 1; } } template template void MatrixBase::CopyFromSp(const SpMatrix & M) { KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_); // MORE EFFICIENT IF LOWER TRIANGULAR! Reverse code otherwise. for (MatrixIndexT i = 0; i < num_rows_; i++) { for (MatrixIndexT j = 0; j < i; j++) { (*this)(j, i) = (*this)(i, j) = M(i, j); } (*this)(i, i) = M(i, i); } } // Instantiate this function template void MatrixBase::CopyFromSp(const SpMatrix & M); template void MatrixBase::CopyFromSp(const SpMatrix & M); template template void MatrixBase::CopyFromTp(const TpMatrix & M, MatrixTransposeType Trans) { if (Trans == kNoTrans) { KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_); SetZero(); Real *out_i = data_; const OtherReal *in_i = M.Data(); for (MatrixIndexT i = 0; i < num_rows_; i++, out_i += stride_, in_i += i) { for (MatrixIndexT j = 0; j <= i; j++) out_i[j] = in_i[j]; } } else { SetZero(); KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_); MatrixIndexT stride = stride_; Real *out_i = data_; const OtherReal *in_i = M.Data(); for (MatrixIndexT i = 0; i < num_rows_; i++, out_i ++, in_i += i) { for (MatrixIndexT j = 0; j <= i; j++) out_i[j*stride] = in_i[j]; } } } template void MatrixBase::CopyFromTp(const TpMatrix & M, MatrixTransposeType trans); template void MatrixBase::CopyFromTp(const TpMatrix & M, MatrixTransposeType trans); template void MatrixBase::CopyFromTp(const TpMatrix & M, MatrixTransposeType trans); template void MatrixBase::CopyFromTp(const TpMatrix & M, MatrixTransposeType trans); */ template void MatrixBase::CopyRowsFromVec(const VectorBase &rv) { if (rv.Dim() == num_rows_*num_cols_) { if (stride_ == num_cols_) { // one big copy operation. const Real *rv_data = rv.Data(); std::memcpy(data_, rv_data, sizeof(Real)*num_rows_*num_cols_); } else { const Real *rv_data = rv.Data(); for (MatrixIndexT r = 0; r < num_rows_; r++) { Real *row_data = RowData(r); for (MatrixIndexT c = 0; c < num_cols_; c++) { row_data[c] = rv_data[c]; } rv_data += num_cols_; } } } else if (rv.Dim() == num_cols_) { const Real *rv_data = rv.Data(); for (MatrixIndexT r = 0; r < num_rows_; r++) std::memcpy(RowData(r), rv_data, sizeof(Real)*num_cols_); } else { KALDI_ERR << "Wrong sized arguments"; } } template template void MatrixBase::CopyRowsFromVec(const VectorBase &rv) { if (rv.Dim() == num_rows_*num_cols_) { const OtherReal *rv_data = rv.Data(); for (MatrixIndexT r = 0; r < num_rows_; r++) { Real *row_data = RowData(r); for (MatrixIndexT c = 0; c < num_cols_; c++) { row_data[c] = static_cast(rv_data[c]); } rv_data += num_cols_; } } else if (rv.Dim() == num_cols_) { const OtherReal *rv_data = rv.Data(); Real *first_row_data = RowData(0); for (MatrixIndexT c = 0; c < num_cols_; c++) first_row_data[c] = rv_data[c]; for (MatrixIndexT r = 1; r < num_rows_; r++) std::memcpy(RowData(r), first_row_data, sizeof(Real)*num_cols_); } else { KALDI_ERR << "Wrong sized arguments."; } } template void MatrixBase::CopyRowsFromVec(const VectorBase &rv); template void MatrixBase::CopyRowsFromVec(const VectorBase &rv); template void MatrixBase::CopyColsFromVec(const VectorBase &rv) { if (rv.Dim() == num_rows_*num_cols_) { const Real *v_inc_data = rv.Data(); Real *m_inc_data = data_; for (MatrixIndexT c = 0; c < num_cols_; c++) { for (MatrixIndexT r = 0; r < num_rows_; r++) { m_inc_data[r * stride_] = v_inc_data[r]; } v_inc_data += num_rows_; m_inc_data ++; } } else if (rv.Dim() == num_rows_) { const Real *v_inc_data = rv.Data(); Real *m_inc_data = data_; for (MatrixIndexT r = 0; r < num_rows_; r++) { Real value = *(v_inc_data++); for (MatrixIndexT c = 0; c < num_cols_; c++) m_inc_data[c] = value; m_inc_data += stride_; } } else { KALDI_ERR << "Wrong size of arguments."; } } template void MatrixBase::CopyRowFromVec(const VectorBase &rv, const MatrixIndexT row) { KALDI_ASSERT(rv.Dim() == num_cols_ && static_cast(row) < static_cast(num_rows_)); const Real *rv_data = rv.Data(); Real *row_data = RowData(row); std::memcpy(row_data, rv_data, num_cols_ * sizeof(Real)); } /* template void MatrixBase::CopyDiagFromVec(const VectorBase &rv) { KALDI_ASSERT(rv.Dim() == std::min(num_cols_, num_rows_)); const Real *rv_data = rv.Data(), *rv_end = rv_data + rv.Dim(); Real *my_data = this->Data(); for (; rv_data != rv_end; rv_data++, my_data += (this->stride_+1)) *my_data = *rv_data; }*/ template void MatrixBase::CopyColFromVec(const VectorBase &rv, const MatrixIndexT col) { KALDI_ASSERT(rv.Dim() == num_rows_ && static_cast(col) < static_cast(num_cols_)); const Real *rv_data = rv.Data(); Real *col_data = data_ + col; for (MatrixIndexT r = 0; r < num_rows_; r++) col_data[r * stride_] = rv_data[r]; } template void Matrix::RemoveRow(MatrixIndexT i) { KALDI_ASSERT(static_cast(i) < static_cast(MatrixBase::num_rows_) && "Access out of matrix"); for (MatrixIndexT j = i + 1; j < MatrixBase::num_rows_; j++) MatrixBase::Row(j-1).CopyFromVec( MatrixBase::Row(j)); MatrixBase::num_rows_--; } template void Matrix::Destroy() { // we need to free the data block if it was defined if (NULL != MatrixBase::data_) KALDI_MEMALIGN_FREE( MatrixBase::data_); MatrixBase::data_ = NULL; MatrixBase::num_rows_ = MatrixBase::num_cols_ = MatrixBase::stride_ = 0; } /* template void MatrixBase::MulElements(const MatrixBase &a) { KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_); if (num_cols_ == stride_ && num_cols_ == a.stride_) { mul_elements(num_rows_ * num_cols_, a.data_, data_); } else { MatrixIndexT a_stride = a.stride_, stride = stride_; Real *data = data_, *a_data = a.data_; for (MatrixIndexT i = 0; i < num_rows_; i++) { mul_elements(num_cols_, a_data, data); a_data += a_stride; data += stride; } } } template void MatrixBase::DivElements(const MatrixBase &a) { KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_); MatrixIndexT i; MatrixIndexT j; for (i = 0; i < num_rows_; i++) { for (j = 0; j < num_cols_; j++) { (*this)(i, j) /= a(i, j); } } } template Real MatrixBase::Sum() const { double sum = 0.0; for (MatrixIndexT i = 0; i < num_rows_; i++) { for (MatrixIndexT j = 0; j < num_cols_; j++) { sum += (*this)(i, j); } } return (Real)sum; } template void MatrixBase::Max(const MatrixBase &A) { KALDI_ASSERT(A.NumRows() == NumRows() && A.NumCols() == NumCols()); for (MatrixIndexT row = 0; row < num_rows_; row++) { Real *row_data = RowData(row); const Real *other_row_data = A.RowData(row); MatrixIndexT num_cols = num_cols_; for (MatrixIndexT col = 0; col < num_cols; col++) { row_data[col] = std::max(row_data[col], other_row_data[col]); } } } template void MatrixBase::Min(const MatrixBase &A) { KALDI_ASSERT(A.NumRows() == NumRows() && A.NumCols() == NumCols()); for (MatrixIndexT row = 0; row < num_rows_; row++) { Real *row_data = RowData(row); const Real *other_row_data = A.RowData(row); MatrixIndexT num_cols = num_cols_; for (MatrixIndexT col = 0; col < num_cols; col++) { row_data[col] = std::min(row_data[col], other_row_data[col]); } } } template void MatrixBase::Scale(Real alpha) { if (alpha == 1.0) return; if (num_rows_ == 0) return; if (num_cols_ == stride_) { cblas_Xscal(static_cast(num_rows_) * static_cast(num_cols_), alpha, data_,1); } else { Real *data = data_; for (MatrixIndexT i = 0; i < num_rows_; ++i, data += stride_) { cblas_Xscal(num_cols_, alpha, data,1); } } } template // scales each row by scale[i]. void MatrixBase::MulRowsVec(const VectorBase &scale) { KALDI_ASSERT(scale.Dim() == num_rows_); MatrixIndexT M = num_rows_, N = num_cols_; for (MatrixIndexT i = 0; i < M; i++) { Real this_scale = scale(i); for (MatrixIndexT j = 0; j < N; j++) { (*this)(i, j) *= this_scale; } } } template void MatrixBase::MulRowsGroupMat(const MatrixBase &src) { KALDI_ASSERT(src.NumRows() == this->NumRows() && this->NumCols() % src.NumCols() == 0); int32 group_size = this->NumCols() / src.NumCols(), num_groups = this->NumCols() / group_size, num_rows = this->NumRows(); for (MatrixIndexT i = 0; i < num_rows; i++) { Real *data = this->RowData(i); for (MatrixIndexT j = 0; j < num_groups; j++, data += group_size) { Real scale = src(i, j); cblas_Xscal(group_size, scale, data, 1); } } } template void MatrixBase::GroupPnormDeriv(const MatrixBase &input, const MatrixBase &output, Real power) { KALDI_ASSERT(input.NumCols() == this->NumCols() && input.NumRows() == this->NumRows()); KALDI_ASSERT(this->NumCols() % output.NumCols() == 0 && this->NumRows() == output.NumRows()); int group_size = this->NumCols() / output.NumCols(), num_rows = this->NumRows(), num_cols = this->NumCols(); if (power == 1.0) { for (MatrixIndexT i = 0; i < num_rows; i++) { for (MatrixIndexT j = 0; j < num_cols; j++) { Real input_val = input(i, j); (*this)(i, j) = (input_val == 0 ? 0 : (input_val > 0 ? 1 : -1)); } } } else if (power == std::numeric_limits::infinity()) { for (MatrixIndexT i = 0; i < num_rows; i++) { for (MatrixIndexT j = 0; j < num_cols; j++) { Real output_val = output(i, j / group_size), input_val = input(i, j); if (output_val == 0) (*this)(i, j) = 0; else (*this)(i, j) = (std::abs(input_val) == output_val ? 1.0 : 0.0) * (input_val >= 0 ? 1 : -1); } } } else { for (MatrixIndexT i = 0; i < num_rows; i++) { for (MatrixIndexT j = 0; j < num_cols; j++) { Real output_val = output(i, j / group_size), input_val = input(i, j); if (output_val == 0) (*this)(i, j) = 0; else (*this)(i, j) = pow(std::abs(input_val), power - 1) * pow(output_val, 1 - power) * (input_val >= 0 ? 1 : -1) ; } } } } template void MatrixBase::GroupMaxDeriv(const MatrixBase &input, const MatrixBase &output) { KALDI_ASSERT(input.NumCols() == this->NumCols() && input.NumRows() == this->NumRows()); KALDI_ASSERT(this->NumCols() % output.NumCols() == 0 && this->NumRows() == output.NumRows()); int group_size = this->NumCols() / output.NumCols(), num_rows = this->NumRows(), num_cols = this->NumCols(); for (MatrixIndexT i = 0; i < num_rows; i++) { for (MatrixIndexT j = 0; j < num_cols; j++) { Real input_val = input(i, j); Real output_val = output(i, j / group_size); (*this)(i, j) = (input_val == output_val ? 1 : 0); } } } template // scales each column by scale[i]. void MatrixBase::MulColsVec(const VectorBase &scale) { KALDI_ASSERT(scale.Dim() == num_cols_); for (MatrixIndexT i = 0; i < num_rows_; i++) { for (MatrixIndexT j = 0; j < num_cols_; j++) { Real this_scale = scale(j); (*this)(i, j) *= this_scale; } } } */ template void MatrixBase::SetZero() { if (num_cols_ == stride_) memset(data_, 0, sizeof(Real)*num_rows_*num_cols_); else for (MatrixIndexT row = 0; row < num_rows_; row++) memset(data_ + row*stride_, 0, sizeof(Real)*num_cols_); } template void MatrixBase::Set(Real value) { for (MatrixIndexT row = 0; row < num_rows_; row++) { for (MatrixIndexT col = 0; col < num_cols_; col++) { (*this)(row, col) = value; } } } /* template void MatrixBase::SetUnit() { SetZero(); for (MatrixIndexT row = 0; row < std::min(num_rows_, num_cols_); row++) (*this)(row, row) = 1.0; } template void MatrixBase::SetRandn() { kaldi::RandomState rstate; for (MatrixIndexT row = 0; row < num_rows_; row++) { Real *row_data = this->RowData(row); MatrixIndexT nc = (num_cols_ % 2 == 1) ? num_cols_ - 1 : num_cols_; for (MatrixIndexT col = 0; col < nc; col += 2) { kaldi::RandGauss2(row_data + col, row_data + col + 1, &rstate); } if (nc != num_cols_) row_data[nc] = static_cast(kaldi::RandGauss(&rstate)); } } template void MatrixBase::SetRandUniform() { kaldi::RandomState rstate; for (MatrixIndexT row = 0; row < num_rows_; row++) { Real *row_data = this->RowData(row); for (MatrixIndexT col = 0; col < num_cols_; col++, row_data++) { *row_data = static_cast(kaldi::RandUniform(&rstate)); } } } */ template void MatrixBase::Write(std::ostream &os, bool binary) const { if (!os.good()) { KALDI_ERR << "Failed to write matrix to stream: stream not good"; } if (binary) { // Use separate binary and text formats, // since in binary mode we need to know if it's float or double. std::string my_token = (sizeof(Real) == 4 ? "FM" : "DM"); WriteToken(os, binary, my_token); { int32 rows = this->num_rows_; // make the size 32-bit on disk. int32 cols = this->num_cols_; KALDI_ASSERT(this->num_rows_ == (MatrixIndexT) rows); KALDI_ASSERT(this->num_cols_ == (MatrixIndexT) cols); WriteBasicType(os, binary, rows); WriteBasicType(os, binary, cols); } if (Stride() == NumCols()) os.write(reinterpret_cast (Data()), sizeof(Real) * static_cast(num_rows_) * static_cast(num_cols_)); else for (MatrixIndexT i = 0; i < num_rows_; i++) os.write(reinterpret_cast (RowData(i)), sizeof(Real) * num_cols_); if (!os.good()) { KALDI_ERR << "Failed to write matrix to stream"; } } else { // text mode. if (num_cols_ == 0) { os << " [ ]\n"; } else { os << " ["; for (MatrixIndexT i = 0; i < num_rows_; i++) { os << "\n "; for (MatrixIndexT j = 0; j < num_cols_; j++) os << (*this)(i, j) << " "; } os << "]\n"; } } } template void MatrixBase::Read(std::istream & is, bool binary) { // In order to avoid rewriting this, we just declare a Matrix and // use it to read the data, then copy. Matrix tmp; tmp.Read(is, binary); if (tmp.NumRows() != NumRows() || tmp.NumCols() != NumCols()) { KALDI_ERR << "MatrixBase::Read, size mismatch " << NumRows() << " x " << NumCols() << " versus " << tmp.NumRows() << " x " << tmp.NumCols(); } CopyFromMat(tmp); } template void Matrix::Read(std::istream & is, bool binary) { // now assume add == false. MatrixIndexT pos_at_start = is.tellg(); std::ostringstream specific_error; if (binary) { // Read in binary mode. int peekval = Peek(is, binary); if (peekval == 'C') { // This code enables us to read CompressedMatrix as a regular matrix. //CompressedMatrix compressed_mat; //compressed_mat.Read(is, binary); // at this point, add == false. //this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols()); //compressed_mat.CopyToMat(this); return; } const char *my_token = (sizeof(Real) == 4 ? "FM" : "DM"); char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F'); if (peekval == other_token_start) { // need to instantiate the other type to read it. typedef typename OtherReal::Real OtherType; // if Real == float, OtherType == double, and vice versa. Matrix other(this->num_rows_, this->num_cols_); other.Read(is, binary); // add is false at this point anyway. this->Resize(other.NumRows(), other.NumCols()); this->CopyFromMat(other); return; } std::string token; ReadToken(is, binary, &token); if (token != my_token) { if (token.length() > 20) token = token.substr(0, 17) + "..."; specific_error << ": Expected token " << my_token << ", got " << token; goto bad; } int32 rows, cols; ReadBasicType(is, binary, &rows); // throws on error. ReadBasicType(is, binary, &cols); // throws on error. if ((MatrixIndexT)rows != this->num_rows_ || (MatrixIndexT)cols != this->num_cols_) { this->Resize(rows, cols); } if (this->Stride() == this->NumCols() && rows*cols!=0) { is.read(reinterpret_cast(this->Data()), sizeof(Real)*rows*cols); if (is.fail()) goto bad; } else { for (MatrixIndexT i = 0; i < (MatrixIndexT)rows; i++) { is.read(reinterpret_cast(this->RowData(i)), sizeof(Real)*cols); if (is.fail()) goto bad; } } if (is.eof()) return; if (is.fail()) goto bad; return; } else { // Text mode. std::string str; is >> str; // get a token if (is.fail()) { specific_error << ": Expected \"[\", got EOF"; goto bad; } // if ((str.compare("DM") == 0) || (str.compare("FM") == 0)) { // Back compatibility. // is >> str; // get #rows // is >> str; // get #cols // is >> str; // get "[" // } if (str == "[]") { Resize(0, 0); return; } // Be tolerant of variants. else if (str != "[") { if (str.length() > 20) str = str.substr(0, 17) + "..."; specific_error << ": Expected \"[\", got \"" << str << '"'; goto bad; } // At this point, we have read "[". std::vector* > data; std::vector *cur_row = new std::vector; while (1) { int i = is.peek(); if (i == -1) { specific_error << "Got EOF while reading matrix data"; goto cleanup; } else if (static_cast(i) == ']') { // Finished reading matrix. is.get(); // eat the "]". i = is.peek(); if (static_cast(i) == '\r') { is.get(); is.get(); // get \r\n (must eat what we wrote) } else if (static_cast(i) == '\n') { is.get(); } // get \n (must eat what we wrote) if (is.fail()) { KALDI_WARN << "After end of matrix data, read error."; // we got the data we needed, so just warn for this error. } // Now process the data. if (!cur_row->empty()) data.push_back(cur_row); else delete(cur_row); cur_row = NULL; if (data.empty()) { this->Resize(0, 0); return; } else { int32 num_rows = data.size(), num_cols = data[0]->size(); this->Resize(num_rows, num_cols); for (int32 i = 0; i < num_rows; i++) { if (static_cast(data[i]->size()) != num_cols) { specific_error << "Matrix has inconsistent #cols: " << num_cols << " vs." << data[i]->size() << " (processing row" << i << ")"; goto cleanup; } for (int32 j = 0; j < num_cols; j++) (*this)(i, j) = (*(data[i]))[j]; delete data[i]; data[i] = NULL; } } return; } else if (static_cast(i) == '\n' || static_cast(i) == ';') { // End of matrix row. is.get(); if (cur_row->size() != 0) { data.push_back(cur_row); cur_row = new std::vector; cur_row->reserve(data.back()->size()); } } else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number... Real r; is >> r; if (is.fail()) { specific_error << "Stream failure/EOF while reading matrix data."; goto cleanup; } cur_row->push_back(r); } else if (isspace(i)) { is.get(); // eat the space and do nothing. } else { // NaN or inf or error. std::string str; is >> str; if (!KALDI_STRCASECMP(str.c_str(), "inf") || !KALDI_STRCASECMP(str.c_str(), "infinity")) { cur_row->push_back(std::numeric_limits::infinity()); KALDI_WARN << "Reading infinite value into matrix."; } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) { cur_row->push_back(std::numeric_limits::quiet_NaN()); KALDI_WARN << "Reading NaN value into matrix."; } else { if (str.length() > 20) str = str.substr(0, 17) + "..."; specific_error << "Expecting numeric matrix data, got " << str; goto cleanup; } } } // Note, we never leave the while () loop before this // line (we return from it.) cleanup: // We only reach here in case of error in the while loop above. if(cur_row != NULL) delete cur_row; for (size_t i = 0; i < data.size(); i++) if(data[i] != NULL) delete data[i]; // and then go on to "bad" below, where we print error. } bad: KALDI_ERR << "Failed to read matrix from stream. " << specific_error.str() << " File position at start is " << pos_at_start << ", currently " << is.tellg(); } // Constructor... note that this is not const-safe as it would // be quite complicated to implement a "const SubMatrix" class that // would not allow its contents to be changed. template SubMatrix::SubMatrix(const MatrixBase &M, const MatrixIndexT ro, const MatrixIndexT r, const MatrixIndexT co, const MatrixIndexT c) { if (r == 0 || c == 0) { // we support the empty sub-matrix as a special case. KALDI_ASSERT(c == 0 && r == 0); this->data_ = NULL; this->num_cols_ = 0; this->num_rows_ = 0; this->stride_ = 0; return; } KALDI_ASSERT(static_cast(ro) < static_cast(M.num_rows_) && static_cast(co) < static_cast(M.num_cols_) && static_cast(r) <= static_cast(M.num_rows_ - ro) && static_cast(c) <= static_cast(M.num_cols_ - co)); // point to the begining of window MatrixBase::num_rows_ = r; MatrixBase::num_cols_ = c; MatrixBase::stride_ = M.Stride(); MatrixBase::data_ = M.Data_workaround() + static_cast(co) + static_cast(ro) * static_cast(M.Stride()); } template SubMatrix::SubMatrix(Real *data, MatrixIndexT num_rows, MatrixIndexT num_cols, MatrixIndexT stride): MatrixBase(data, num_cols, num_rows, stride) { // caution: reversed order! if (data == NULL) { KALDI_ASSERT(num_rows * num_cols == 0); this->num_rows_ = 0; this->num_cols_ = 0; this->stride_ = 0; } else { KALDI_ASSERT(this->stride_ >= this->num_cols_); } } /* template void MatrixBase::Add(const Real alpha) { Real *data = data_; MatrixIndexT stride = stride_; for (MatrixIndexT r = 0; r < num_rows_; r++) for (MatrixIndexT c = 0; c < num_cols_; c++) data[c + stride*r] += alpha; } template void MatrixBase::AddToDiag(const Real alpha) { Real *data = data_; MatrixIndexT this_stride = stride_ + 1, num_to_add = std::min(num_rows_, num_cols_); for (MatrixIndexT r = 0; r < num_to_add; r++) data[r * this_stride] += alpha; } template Real MatrixBase::Cond() const { KALDI_ASSERT(num_rows_ > 0&&num_cols_ > 0); Vector singular_values(std::min(num_rows_, num_cols_)); Svd(&singular_values); // Get singular values... Real min = singular_values(0), max = singular_values(0); // both absolute values... for (MatrixIndexT i = 1;i < singular_values.Dim();i++) { min = std::min((Real)std::abs(singular_values(i)), min); max = std::max((Real)std::abs(singular_values(i)), max); } if (min > 0) return max/min; else return std::numeric_limits::infinity(); } template Real MatrixBase::Trace(bool check_square) const { KALDI_ASSERT(!check_square || num_rows_ == num_cols_); Real ans = 0.0; for (MatrixIndexT r = 0;r < std::min(num_rows_, num_cols_);r++) ans += data_ [r + stride_*r]; return ans; } template Real MatrixBase::Max() const { KALDI_ASSERT(num_rows_ > 0 && num_cols_ > 0); Real ans= *data_; for (MatrixIndexT r = 0; r < num_rows_; r++) for (MatrixIndexT c = 0; c < num_cols_; c++) if (data_[c + stride_*r] > ans) ans = data_[c + stride_*r]; return ans; } template Real MatrixBase::Min() const { KALDI_ASSERT(num_rows_ > 0 && num_cols_ > 0); Real ans= *data_; for (MatrixIndexT r = 0; r < num_rows_; r++) for (MatrixIndexT c = 0; c < num_cols_; c++) if (data_[c + stride_*r] < ans) ans = data_[c + stride_*r]; return ans; } template void MatrixBase::AddMatMatMat(Real alpha, const MatrixBase &A, MatrixTransposeType transA, const MatrixBase &B, MatrixTransposeType transB, const MatrixBase &C, MatrixTransposeType transC, Real beta) { // Note on time taken with different orders of computation. Assume not transposed in this / // discussion. Firstly, normalize expressions using A.NumCols == B.NumRows and B.NumCols == C.NumRows, prefer // rows where there is a choice. // time taken for (AB) is: A.NumRows*B.NumRows*C.Rows // time taken for (AB)C is A.NumRows*C.NumRows*C.Cols // so this order is A.NumRows*B.NumRows*C.NumRows + A.NumRows*C.NumRows*C.NumCols. // time taken for (BC) is: B.NumRows*C.NumRows*C.Cols // time taken for A(BC) is: A.NumRows*B.NumRows*C.Cols // so this order is B.NumRows*C.NumRows*C.NumCols + A.NumRows*B.NumRows*C.Cols MatrixIndexT ARows = A.num_rows_, ACols = A.num_cols_, BRows = B.num_rows_, BCols = B.num_cols_, CRows = C.num_rows_, CCols = C.num_cols_; if (transA == kTrans) std::swap(ARows, ACols); if (transB == kTrans) std::swap(BRows, BCols); if (transC == kTrans) std::swap(CRows, CCols); MatrixIndexT AB_C_time = ARows*BRows*CRows + ARows*CRows*CCols; MatrixIndexT A_BC_time = BRows*CRows*CCols + ARows*BRows*CCols; if (AB_C_time < A_BC_time) { Matrix AB(ARows, BCols); AB.AddMatMat(1.0, A, transA, B, transB, 0.0); // AB = A * B. (*this).AddMatMat(alpha, AB, kNoTrans, C, transC, beta); } else { Matrix BC(BRows, CCols); BC.AddMatMat(1.0, B, transB, C, transC, 0.0); // BC = B * C. (*this).AddMatMat(alpha, A, transA, BC, kNoTrans, beta); } } template void MatrixBase::DestructiveSvd(VectorBase *s, MatrixBase *U, MatrixBase *Vt) { // Svd, *this = U*diag(s)*Vt. // With (*this).num_rows_ == m, (*this).num_cols_ == n, // Support only skinny Svd with m>=n (NumRows>=NumCols), and zero sizes for U and Vt mean // we do not want that output. We expect that s.Dim() == m, // U is either 0 by 0 or m by n, and rv is either 0 by 0 or n by n. // Throws exception on error. KALDI_ASSERT(num_rows_>=num_cols_ && "Svd requires that #rows by >= #cols."); // For compatibility with JAMA code. KALDI_ASSERT(s->Dim() == num_cols_); // s should be the smaller dim. KALDI_ASSERT(U == NULL || (U->num_rows_ == num_rows_&&U->num_cols_ == num_cols_)); KALDI_ASSERT(Vt == NULL || (Vt->num_rows_ == num_cols_&&Vt->num_cols_ == num_cols_)); Real prescale = 1.0; if ( std::abs((*this)(0, 0) ) < 1.0e-30) { // Very tiny value... can cause problems in Svd. Real max_elem = LargestAbsElem(); if (max_elem != 0) { prescale = 1.0 / max_elem; if (std::abs(prescale) == std::numeric_limits::infinity()) { prescale = 1.0e+40; } (*this).Scale(prescale); } } #if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD) // "S" == skinny Svd (only one we support because of compatibility with Jama one which is only skinny), // "N"== no eigenvectors wanted. LapackGesvd(s, U, Vt); #else /* if (num_rows_ > 1 && num_cols_ > 1 && (*this)(0, 0) == (*this)(1, 1) && Max() == Min() && (*this)(0, 0) != 0.0) { // special case that JamaSvd sometimes crashes on. KALDI_WARN << "Jama SVD crashes on this type of matrix, perturbing it to prevent crash."; for(int32 i = 0; i < NumRows(); i++) (*this)(i, i) *= 1.00001; }*/ // bool ans = JamaSvd(s, U, Vt); //if (Vt != NULL) Vt->Transpose(); // possibly to do: change this and also the transpose inside the JamaSvd routine. note, Vt is square. //if (!ans) { //KALDI_ERR << "Error doing Svd"; // This one will be caught. //} //#endif //if (prescale != 1.0) s->Scale(1.0/prescale); //} /* template void MatrixBase::Svd(VectorBase *s, MatrixBase *U, MatrixBase *Vt) const { try { if (num_rows_ >= num_cols_) { Matrix tmp(*this); tmp.DestructiveSvd(s, U, Vt); } else { Matrix tmp(*this, kTrans); // transpose of *this. // rVt will have different dim so cannot transpose in-place --> use a temp matrix. Matrix Vt_Trans(Vt ? Vt->num_cols_ : 0, Vt ? Vt->num_rows_ : 0); // U will be transpose tmp.DestructiveSvd(s, Vt ? &Vt_Trans : NULL, U); if (U) U->Transpose(); if (Vt) Vt->CopyFromMat(Vt_Trans, kTrans); // copy with transpose. } } catch (...) { KALDI_ERR << "Error doing Svd (did not converge), first part of matrix is\n" << SubMatrix(*this, 0, std::min((MatrixIndexT)10, num_rows_), 0, std::min((MatrixIndexT)10, num_cols_)) << ", min and max are: " << Min() << ", " << Max(); } } template bool MatrixBase::IsSymmetric(Real cutoff) const { MatrixIndexT R = num_rows_, C = num_cols_; if (R != C) return false; Real bad_sum = 0.0, good_sum = 0.0; for (MatrixIndexT i = 0;i < R;i++) { for (MatrixIndexT j = 0;j < i;j++) { Real a = (*this)(i, j), b = (*this)(j, i), avg = 0.5*(a+b), diff = 0.5*(a-b); good_sum += std::abs(avg); bad_sum += std::abs(diff); } good_sum += std::abs((*this)(i, i)); } if (bad_sum > cutoff*good_sum) return false; return true; } template bool MatrixBase::IsDiagonal(Real cutoff) const{ MatrixIndexT R = num_rows_, C = num_cols_; Real bad_sum = 0.0, good_sum = 0.0; for (MatrixIndexT i = 0;i < R;i++) { for (MatrixIndexT j = 0;j < C;j++) { if (i == j) good_sum += std::abs((*this)(i, j)); else bad_sum += std::abs((*this)(i, j)); } } return (!(bad_sum > good_sum * cutoff)); } // This does nothing, it's designed to trigger Valgrind errors // if any memory is uninitialized. template void MatrixBase::TestUninitialized() const { MatrixIndexT R = num_rows_, C = num_cols_, positive = 0; for (MatrixIndexT i = 0; i < R; i++) for (MatrixIndexT j = 0; j < C; j++) if ((*this)(i, j) > 0.0) positive++; if (positive > R * C) KALDI_ERR << "Error...."; } template bool MatrixBase::IsUnit(Real cutoff) const { MatrixIndexT R = num_rows_, C = num_cols_; Real bad_max = 0.0; for (MatrixIndexT i = 0; i < R;i++) for (MatrixIndexT j = 0; j < C;j++) bad_max = std::max(bad_max, static_cast(std::abs( (*this)(i, j) - (i == j?1.0:0.0)))); return (bad_max <= cutoff); } template bool MatrixBase::IsZero(Real cutoff)const { MatrixIndexT R = num_rows_, C = num_cols_; Real bad_max = 0.0; for (MatrixIndexT i = 0;i < R;i++) for (MatrixIndexT j = 0;j < C;j++) bad_max = std::max(bad_max, static_cast(std::abs( (*this)(i, j) ))); return (bad_max <= cutoff); } template Real MatrixBase::FrobeniusNorm() const{ return std::sqrt(TraceMatMat(*this, *this, kTrans)); } template bool MatrixBase::ApproxEqual(const MatrixBase &other, float tol) const { if (num_rows_ != other.num_rows_ || num_cols_ != other.num_cols_) KALDI_ERR << "ApproxEqual: size mismatch."; Matrix tmp(*this); tmp.AddMat(-1.0, other); return (tmp.FrobeniusNorm() <= static_cast(tol) * this->FrobeniusNorm()); } template bool MatrixBase::Equal(const MatrixBase &other) const { if (num_rows_ != other.num_rows_ || num_cols_ != other.num_cols_) KALDI_ERR << "Equal: size mismatch."; for (MatrixIndexT i = 0; i < num_rows_; i++) for (MatrixIndexT j = 0; j < num_cols_; j++) if ( (*this)(i, j) != other(i, j)) return false; return true; } template Real MatrixBase::LargestAbsElem() const{ MatrixIndexT R = num_rows_, C = num_cols_; Real largest = 0.0; for (MatrixIndexT i = 0;i < R;i++) for (MatrixIndexT j = 0;j < C;j++) largest = std::max(largest, (Real)std::abs((*this)(i, j))); return largest; } template void MatrixBase::OrthogonalizeRows() { KALDI_ASSERT(NumRows() <= NumCols()); MatrixIndexT num_rows = num_rows_; for (MatrixIndexT i = 0; i < num_rows; i++) { int32 counter = 0; while (1) { Real start_prod = VecVec(this->Row(i), this->Row(i)); if (start_prod - start_prod != 0.0 || start_prod == 0.0) { KALDI_WARN << "Self-product of row " << i << " of matrix is " << start_prod << ", randomizing."; this->Row(i).SetRandn(); counter++; continue; // loop again. } for (MatrixIndexT j = 0; j < i; j++) { Real prod = VecVec(this->Row(i), this->Row(j)); this->Row(i).AddVec(-prod, this->Row(j)); } Real end_prod = VecVec(this->Row(i), this->Row(i)); if (end_prod <= 0.01 * start_prod) { // We removed // almost all of the vector during orthogonalization, // so we have reason to doubt (for roundoff reasons) // that it's still orthogonal to the other vectors. // We need to orthogonalize again. if (end_prod == 0.0) { // Row is exactly zero: // generate random direction. this->Row(i).SetRandn(); } counter++; if (counter > 100) KALDI_ERR << "Loop detected while orthogalizing matrix."; } else { this->Row(i).Scale(1.0 / std::sqrt(end_prod)); break; } } } } // Uses Svd to compute the eigenvalue decomposition of a symmetric positive semidefinite // matrix: // (*this) = rU * diag(rs) * rU^T, with rU an orthogonal matrix so rU^{-1} = rU^T. // Does this by computing svd (*this) = U diag(rs) V^T ... answer is just U diag(rs) U^T. // Throws exception if this failed to within supplied precision (typically because *this was not // symmetric positive definite). template void MatrixBase::SymPosSemiDefEig(VectorBase *rs, MatrixBase *rU, Real check_thresh) // e.g. check_thresh = 0.001 { const MatrixIndexT D = num_rows_; KALDI_ASSERT(num_rows_ == num_cols_); KALDI_ASSERT(IsSymmetric() && "SymPosSemiDefEig: expecting input to be symmetrical."); KALDI_ASSERT(rU->num_rows_ == D && rU->num_cols_ == D && rs->Dim() == D); Matrix Vt(D, D); Svd(rs, rU, &Vt); // First just zero any singular values if the column of U and V do not have +ve dot product-- // this may mean we have small negative eigenvalues, and if we zero them the result will be closer to correct. for (MatrixIndexT i = 0;i < D;i++) { Real sum = 0.0; for (MatrixIndexT j = 0;j < D;j++) sum += (*rU)(j, i) * Vt(i, j); if (sum < 0.0) (*rs)(i) = 0.0; } { Matrix tmpU(*rU); Vector tmps(*rs); tmps.ApplyPow(0.5); tmpU.MulColsVec(tmps); SpMatrix tmpThis(D); tmpThis.AddMat2(1.0, tmpU, kNoTrans, 0.0); Matrix tmpThisFull(tmpThis); float new_norm = tmpThisFull.FrobeniusNorm(); float old_norm = (*this).FrobeniusNorm(); tmpThisFull.AddMat(-1.0, (*this)); if (!(old_norm == 0 && new_norm == 0)) { float diff_norm = tmpThisFull.FrobeniusNorm(); if (std::abs(new_norm-old_norm) > old_norm*check_thresh || diff_norm > old_norm*check_thresh) { KALDI_WARN << "SymPosSemiDefEig seems to have failed " << diff_norm << " !<< " << check_thresh << "*" << old_norm << ", maybe matrix was not " << "positive semi definite. Continuing anyway."; } } } } template Real MatrixBase