[speechx] rm openblas && refactor kaldi-matrix, kaldi-vector (#2824)

* rm openblas && refactor kaldi-matrix kaldi-vector
2 years ago · ee7c266f13
parent c1b1ae0515
commit ee7c266f13
120 changed files with 1281 additions and 20393 deletions
--- a/speechx/CMakeLists.txt
+++ b/speechx/CMakeLists.txt
@ -53,9 +53,6 @@ include(gflags)

 include(glog)

-#openblas
-include(openblas)
-
 # openfst
 include(openfst)
 add_dependencies(openfst gflags glog)
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
@ -14,7 +14,7 @@

 #include "decoder/ctc_prefix_beam_search_decoder.h"
 #include "base/common.h"
-#include "frontend/audio/data_cache.h"
+#include "frontend/data_cache.h"
 #include "fst/symbol-table.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
--- a/speechx/speechx/asr/nnet/decodable.h
+++ b/speechx/speechx/asr/nnet/decodable.h
@ -14,7 +14,7 @@

 #include "base/common.h"
 #include "kaldi/decoder/decodable-itf.h"
-#include "kaldi/matrix/kaldi-matrix.h"
+#include "matrix/kaldi-matrix.h"
 #include "nnet/nnet_itf.h"
 #include "nnet/nnet_producer.h"

--- a/speechx/speechx/asr/nnet/nnet_itf.h
+++ b/speechx/speechx/asr/nnet/nnet_itf.h
@ -15,7 +15,6 @@

 #include "base/basic_types.h"
 #include "kaldi/base/kaldi-types.h"
-#include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"

 DECLARE_int32(subsampling_rate);
--- a/speechx/speechx/asr/nnet/nnet_producer.cc
+++ b/speechx/speechx/asr/nnet/nnet_producer.cc
@ -13,10 +13,10 @@
 // limitations under the License.

 #include "nnet/nnet_producer.h"
+#include "matrix/kaldi-matrix.h"

 namespace ppspeech {

-using kaldi::Vector;
 using std::vector;
 using kaldi::BaseFloat;

--- a/speechx/speechx/asr/nnet/nnet_producer.h
+++ b/speechx/speechx/asr/nnet/nnet_producer.h
@ -16,7 +16,7 @@

 #include "base/common.h"
 #include "base/safe_queue.h"
-#include "frontend/audio/frontend_itf.h"
+#include "frontend/frontend_itf.h"
 #include "nnet/nnet_itf.h"

 namespace ppspeech {
--- a/speechx/speechx/asr/nnet/u2_nnet.h
+++ b/speechx/speechx/asr/nnet/u2_nnet.h
@ -18,7 +18,7 @@
 #pragma once

 #include "base/common.h"
-#include "kaldi/matrix/kaldi-matrix.h"
+#include "matrix/kaldi-matrix.h"
 #include "nnet/nnet_itf.h"
 #include "paddle/extension.h"
 #include "paddle/jit/all.h"
--- a/speechx/speechx/asr/nnet/u2_nnet_main.cc
+++ b/speechx/speechx/asr/nnet/u2_nnet_main.cc
@ -15,8 +15,8 @@

 #include "base/common.h"
 #include "decoder/param.h"
-#include "frontend/audio/assembler.h"
-#include "frontend/audio/data_cache.h"
+#include "frontend/assembler.h"
+#include "frontend/data_cache.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
 #include "nnet/u2_nnet.h"
--- a/speechx/speechx/asr/recognizer/CMakeLists.txt
+++ b/speechx/speechx/asr/recognizer/CMakeLists.txt
@ -15,7 +15,7 @@ set(TEST_BINS
 foreach(bin_name IN LISTS TEST_BINS)
  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-  target_link_libraries(${bin_name} recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-feat-common)
+  target_link_libraries(${bin_name} recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util)
  target_compile_options(${bin_name}  PRIVATE ${PADDLE_COMPILE_FLAGS})
  target_include_directories(${bin_name}  PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
  target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS})
--- a/speechx/speechx/asr/recognizer/u2_recognizer.h
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.h
@ -18,7 +18,7 @@
 #include "decoder/ctc_beam_search_opt.h"
 #include "decoder/ctc_prefix_beam_search_decoder.h"
 #include "decoder/decoder_itf.h"
-#include "frontend/audio/feature_pipeline.h"
+#include "frontend/feature_pipeline.h"
 #include "fst/fstlib.h"
 #include "fst/symbol-table.h"
 #include "nnet/decodable.h"
--- a/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
@ -13,7 +13,7 @@
 // limitations under the License.

 #include "decoder/param.h"
-#include "kaldi/feat/wave-reader.h"
+#include "frontend/wave-reader.h"
 #include "kaldi/util/table-types.h"
 #include "recognizer/u2_recognizer.h"

--- a/speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc
@ -14,7 +14,7 @@

 #include "recognizer/u2_recognizer.h"
 #include "decoder/param.h"
-#include "kaldi/feat/wave-reader.h"
+#include "frontend/wave-reader.h"
 #include "kaldi/util/table-types.h"

 DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
--- a/speechx/speechx/common/CMakeLists.txt
+++ b/speechx/speechx/common/CMakeLists.txt
@ -4,6 +4,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/../
 )
 add_subdirectory(utils)

+add_subdirectory(matrix)
+
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}/frontend
 )
--- a/speechx/speechx/common/frontend/CMakeLists.txt
+++ b/speechx/speechx/common/frontend/CMakeLists.txt
@ -1,2 +1,28 @@
+add_library(kaldi-native-fbank-core 
+  feature-fbank.cc
+  feature-functions.cc
+  feature-window.cc
+  fftsg.c
+  mel-computations.cc
+  rfft.cc
+)

-add_subdirectory(audio)
+add_library(frontend STATIC
+  cmvn.cc
+  audio_cache.cc
+  feature_cache.cc
+  feature_pipeline.cc
+  assembler.cc
+  wave-reader.cc
+)
+target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils)
+
+set(BINS 
+  compute_fbank_main
+)
+
+foreach(bin_name IN LISTS BINS)
+  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+  target_link_libraries(${bin_name} PUBLIC frontend utils kaldi-util gflags glog)
+endforeach()
--- a/speechx/speechx/common/frontend/audio/assembler.cc
+++ b/speechx/speechx/common/frontend/audio/assembler.cc
@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "frontend/audio/assembler.h"
+#include "frontend/assembler.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/assembler.h
+++ b/speechx/speechx/common/frontend/audio/assembler.h
@ -15,7 +15,7 @@
 #pragma once

 #include "base/common.h"
-#include "frontend/audio/frontend_itf.h"
+#include "frontend/frontend_itf.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/CMakeLists.txt
+++ b/speechx/speechx/common/frontend/audio/CMakeLists.txt
@ -1,27 +0,0 @@
-add_library(kaldi-native-fbank-core 
-  feature-fbank.cc
-  feature-functions.cc
-  feature-window.cc
-  fftsg.c
-  mel-computations.cc
-  rfft.cc
-)
-
-add_library(frontend STATIC
-  cmvn.cc
-  audio_cache.cc
-  feature_cache.cc
-  feature_pipeline.cc
-  assembler.cc
-)
-target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils)
-
-set(BINS 
-  compute_fbank_main
-)
-
-foreach(bin_name IN LISTS BINS)
-  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
-  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-  target_link_libraries(${bin_name} PUBLIC frontend utils kaldi-util gflags glog kaldi-feat-common)
-endforeach()
--- a/speechx/speechx/common/frontend/audio/audio_cache.cc
+++ b/speechx/speechx/common/frontend/audio/audio_cache.cc
@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "frontend/audio/audio_cache.h"
+#include "frontend/audio_cache.h"

 #include "kaldi/base/timer.h"

--- a/speechx/speechx/common/frontend/audio/audio_cache.h
+++ b/speechx/speechx/common/frontend/audio/audio_cache.h
@ -16,7 +16,7 @@
 #pragma once

 #include "base/common.h"
-#include "frontend/audio/frontend_itf.h"
+#include "frontend/frontend_itf.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/cmvn.cc
+++ b/speechx/speechx/common/frontend/audio/cmvn.cc
@ -13,7 +13,7 @@
 // limitations under the License.


-#include "frontend/audio/cmvn.h"
+#include "frontend/cmvn.h"

 #include "utils/file_utils.h"
 #include "utils/picojson.h"
--- a/speechx/speechx/common/frontend/audio/cmvn.h
+++ b/speechx/speechx/common/frontend/audio/cmvn.h
@ -15,8 +15,7 @@
 #pragma once

 #include "base/common.h"
-#include "frontend/audio/frontend_itf.h"
-#include "kaldi/matrix/kaldi-matrix.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/util/options-itf.h"

 namespace ppspeech {
--- a/speechx/speechx/common/frontend/audio/compute_fbank_main.cc
+++ b/speechx/speechx/common/frontend/audio/compute_fbank_main.cc
@ -16,13 +16,13 @@

 #include "base/flags.h"
 #include "base/log.h"
-#include "frontend/audio/audio_cache.h"
-#include "frontend/audio/data_cache.h"
-#include "frontend/audio/fbank.h"
-#include "frontend/audio/feature_cache.h"
-#include "frontend/audio/frontend_itf.h"
-#include "frontend/audio/normalizer.h"
-#include "kaldi/feat/wave-reader.h"
+#include "frontend/audio_cache.h"
+#include "frontend/data_cache.h"
+#include "frontend/fbank.h"
+#include "frontend/feature_cache.h"
+#include "frontend/frontend_itf.h"
+#include "frontend/normalizer.h"
+#include "frontend/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
 #include "kaldi/util/table-types.h"

--- a/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
+++ b/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
--- a/speechx/speechx/common/frontend/audio/data_cache.h
+++ b/speechx/speechx/common/frontend/audio/data_cache.h
@ -16,7 +16,7 @@
 #pragma once

 #include "base/common.h"
-#include "frontend/audio/frontend_itf.h"
+#include "frontend/frontend_itf.h"

 using std::vector;

--- a/speechx/speechx/common/frontend/audio/db_norm.cc
+++ b/speechx/speechx/common/frontend/audio/db_norm.cc
--- a/speechx/speechx/common/frontend/audio/db_norm.h
+++ b/speechx/speechx/common/frontend/audio/db_norm.h
--- a/speechx/speechx/common/frontend/audio/fbank.cc
+++ b/speechx/speechx/common/frontend/audio/fbank.cc
--- a/speechx/speechx/common/frontend/audio/fbank.h
+++ b/speechx/speechx/common/frontend/audio/fbank.h
@ -15,8 +15,8 @@
 #pragma once

 #include "base/common.h"
-#include "frontend/audio/feature_common.h"
-#include "frontend/audio/feature-fbank.h"
+#include "frontend/feature_common.h"
+#include "frontend/feature-fbank.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/feature-fbank.cc
+++ b/speechx/speechx/common/frontend/audio/feature-fbank.cc
@ -18,11 +18,11 @@

 // This file is copied/modified from kaldi/src/feat/feature-fbank.cc
 //
-#include "frontend/audio/feature-fbank.h"
+#include "frontend/feature-fbank.h"

 #include <cmath>

-#include "frontend/audio/feature-functions.h"
+#include "frontend/feature-functions.h"

 namespace knf {

--- a/speechx/speechx/common/frontend/audio/feature-fbank.h
+++ b/speechx/speechx/common/frontend/audio/feature-fbank.h
@ -23,9 +23,9 @@

 #include <map>

-#include "frontend/audio/feature-window.h"
-#include "frontend/audio/mel-computations.h"
-#include "frontend/audio/rfft.h"
+#include "frontend/feature-window.h"
+#include "frontend/mel-computations.h"
+#include "frontend/rfft.h"

 namespace knf {

--- a/speechx/speechx/common/frontend/audio/feature-functions.cc
+++ b/speechx/speechx/common/frontend/audio/feature-functions.cc
@ -18,7 +18,7 @@

 // This file is copied/modified from kaldi/src/feat/feature-functions.cc

-#include "frontend/audio/feature-functions.h"
+#include "frontend/feature-functions.h"

 #include <cstdint>
 #include <vector>
--- a/speechx/speechx/common/frontend/audio/feature-functions.h
+++ b/speechx/speechx/common/frontend/audio/feature-functions.h
--- a/speechx/speechx/common/frontend/audio/feature-window.cc
+++ b/speechx/speechx/common/frontend/audio/feature-window.cc
@ -4,7 +4,7 @@

 // This file is copied/modified from kaldi/src/feat/feature-window.cc

-#include "frontend/audio/feature-window.h"
+#include "frontend/feature-window.h"

 #include <cmath>
 #include <vector>
--- a/speechx/speechx/common/frontend/audio/feature-window.h
+++ b/speechx/speechx/common/frontend/audio/feature-window.h
--- a/speechx/speechx/common/frontend/audio/feature_cache.cc
+++ b/speechx/speechx/common/frontend/audio/feature_cache.cc
@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "frontend/audio/feature_cache.h"
+#include "frontend/feature_cache.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/feature_cache.h
+++ b/speechx/speechx/common/frontend/audio/feature_cache.h
@ -15,7 +15,7 @@
 #pragma once

 #include "base/common.h"
-#include "frontend/audio/frontend_itf.h"
+#include "frontend/frontend_itf.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/feature_common.h
+++ b/speechx/speechx/common/frontend/audio/feature_common.h
@ -15,7 +15,7 @@
 #pragma once

 #include "frontend_itf.h"
-#include "frontend/audio/feature-window.h"
+#include "frontend/feature-window.h"

 namespace ppspeech {

@ -52,4 +52,4 @@ class StreamingFeatureTpl : public FrontendInterface {

 }  // namespace ppspeech

-#include "frontend/audio/feature_common_inl.h"
+#include "frontend/feature_common_inl.h"
--- a/speechx/speechx/common/frontend/audio/feature_common_inl.h
+++ b/speechx/speechx/common/frontend/audio/feature_common_inl.h
--- a/speechx/speechx/common/frontend/audio/feature_pipeline.cc
+++ b/speechx/speechx/common/frontend/audio/feature_pipeline.cc
@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "frontend/audio/feature_pipeline.h"
+#include "frontend/feature_pipeline.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/feature_pipeline.h
+++ b/speechx/speechx/common/frontend/audio/feature_pipeline.h
@ -16,13 +16,13 @@

 #pragma once

-#include "frontend/audio/assembler.h"
-#include "frontend/audio/audio_cache.h"
-#include "frontend/audio/data_cache.h"
-#include "frontend/audio/fbank.h"
-#include "frontend/audio/feature_cache.h"
-#include "frontend/audio/frontend_itf.h"
-#include "frontend/audio/normalizer.h"
+#include "frontend/assembler.h"
+#include "frontend/audio_cache.h"
+#include "frontend/data_cache.h"
+#include "frontend/fbank.h"
+#include "frontend/feature_cache.h"
+#include "frontend/frontend_itf.h"
+#include "frontend/cmvn.h"

 // feature
 DECLARE_bool(fill_zero);
--- a/speechx/speechx/common/frontend/audio/fftsg.c
+++ b/speechx/speechx/common/frontend/audio/fftsg.c
--- a/speechx/speechx/common/frontend/audio/frontend_itf.h
+++ b/speechx/speechx/common/frontend/audio/frontend_itf.h
@ -15,7 +15,7 @@
 #pragma once

 #include "base/basic_types.h"
-#include "kaldi/matrix/kaldi-vector.h"
+#include "matrix/kaldi-vector.h"

 namespace ppspeech {

--- a/speechx/speechx/common/frontend/audio/linear_spectrogram.cc
+++ b/speechx/speechx/common/frontend/audio/linear_spectrogram.cc
--- a/speechx/speechx/common/frontend/audio/linear_spectrogram.h
+++ b/speechx/speechx/common/frontend/audio/linear_spectrogram.h
--- a/speechx/speechx/common/frontend/audio/mel-computations.cc
+++ b/speechx/speechx/common/frontend/audio/mel-computations.cc
@ -18,12 +18,12 @@

 // This file is copied/modified from kaldi/src/feat/mel-computations.cc

-#include "frontend/audio/mel-computations.h"
+#include "frontend/mel-computations.h"

 #include <algorithm>
 #include <sstream>

-#include "frontend/audio/feature-window.h"
+#include "frontend/feature-window.h"

 namespace knf {

--- a/speechx/speechx/common/frontend/audio/mel-computations.h
+++ b/speechx/speechx/common/frontend/audio/mel-computations.h
@ -22,7 +22,7 @@
 #include <cmath>
 #include <string>

-#include "frontend/audio/feature-window.h"
+#include "frontend/feature-window.h"

 namespace knf {

--- a/speechx/speechx/common/frontend/audio/mfcc.cc
+++ b/speechx/speechx/common/frontend/audio/mfcc.cc
--- a/speechx/speechx/common/frontend/audio/mfcc.h
+++ b/speechx/speechx/common/frontend/audio/mfcc.h
--- a/speechx/speechx/common/frontend/audio/normalizer.h
+++ b/speechx/speechx/common/frontend/audio/normalizer.h
@ -14,5 +14,4 @@

 #pragma once

-#include "frontend/audio/cmvn.h"
-#include "frontend/audio/db_norm.h"
+#include "frontend/cmvn.h"
--- a/speechx/speechx/common/frontend/audio/rfft.cc
+++ b/speechx/speechx/common/frontend/audio/rfft.cc
@ -16,7 +16,7 @@
 * limitations under the License.
 */

-#include "frontend/audio/rfft.h"
+#include "frontend/rfft.h"

 #include <cmath>
 #include <vector>
--- a/speechx/speechx/common/frontend/audio/rfft.h
+++ b/speechx/speechx/common/frontend/audio/rfft.h
--- a/speechx/speechx/common/frontend/wave-reader.cc
+++ b/speechx/speechx/common/frontend/wave-reader.cc
@ -25,7 +25,7 @@
 #include <sstream>
 #include <vector>

-#include "feat/wave-reader.h"
+#include "frontend/wave-reader.h"
 #include "base/kaldi-error.h"
 #include "base/kaldi-utils.h"

--- a/speechx/speechx/common/frontend/wave-reader.h
+++ b/speechx/speechx/common/frontend/wave-reader.h
--- a/speechx/speechx/common/matrix/CMakeLists.txt
+++ b/speechx/speechx/common/matrix/CMakeLists.txt
@ -0,0 +1,7 @@
+
+add_library(kaldi-matrix
+kaldi-matrix.cc
+kaldi-vector.cc
+)
+
+target_link_libraries(kaldi-matrix kaldi-base)
--- a/speechx/speechx/common/matrix/kaldi-matrix-inl.h
+++ b/speechx/speechx/common/matrix/kaldi-matrix-inl.h
@ -28,7 +28,7 @@ namespace kaldi {
 template<typename Real>
 Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }

-
+/*
 template<>
 template<>
 void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
@ -36,6 +36,7 @@ void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra
 template<>
 template<>
 void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb);
+*/

 template<typename Real>
 inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
--- a/speechx/speechx/common/matrix/kaldi-matrix.cc
+++ b/speechx/speechx/common/matrix/kaldi-matrix.cc
@ -23,17 +23,9 @@
 // limitations under the License.

 #include "matrix/kaldi-matrix.h"
-#include "matrix/sp-matrix.h"
-#include "matrix/jama-svd.h"
-#include "matrix/jama-eig.h"
-#include "matrix/compressed-matrix.h"
-#include "matrix/sparse-matrix.h"
-
-static_assert(int(kaldi::kNoTrans) == int(CblasNoTrans) && int(kaldi::kTrans) == int(CblasTrans), 
-    "kaldi::kNoTrans and kaldi::kTrans must be equal to the appropriate CBLAS library constants!");

 namespace kaldi {
-
+/*
 template<typename Real>
 void MatrixBase<Real>::Invert(Real *log_det, Real *det_sign,
                              bool inverse_needed) {
@ -206,29 +198,30 @@ void MatrixBase<Real>::SetMatMatDivMat(const MatrixBase<Real>& A,
    }
  }
 }
+*/

-
-template<typename Real>
-void MatrixBase<Real>::CopyLowerToUpper() {
-  KALDI_ASSERT(num_rows_ == num_cols_);
-  Real *data = data_;
-  MatrixIndexT num_rows = num_rows_, stride = stride_;
-  for (int32 i = 0; i < num_rows; i++)
-    for (int32 j = 0; j < i; j++)
-      data[j * stride + i ] = data[i * stride + j];
-}
+//template<typename Real>
+//void MatrixBase<Real>::CopyLowerToUpper() {
+  //KALDI_ASSERT(num_rows_ == num_cols_);
+  //Real *data = data_;
+  //MatrixIndexT num_rows = num_rows_, stride = stride_;
+  //for (int32 i = 0; i < num_rows; i++)
+    //for (int32 j = 0; j < i; j++)
+      //data[j * stride + i ] = data[i * stride + j];
+//}


-template<typename Real>
-void MatrixBase<Real>::CopyUpperToLower() {
-  KALDI_ASSERT(num_rows_ == num_cols_);
-  Real *data = data_;
-  MatrixIndexT num_rows = num_rows_, stride = stride_;
-  for (int32 i = 0; i < num_rows; i++)
-    for (int32 j = 0; j < i; j++)
-      data[i * stride + j] = data[j * stride + i];
-}
+//template<typename Real>
+//void MatrixBase<Real>::CopyUpperToLower() {
+  //KALDI_ASSERT(num_rows_ == num_cols_);
+  //Real *data = data_;
+  //MatrixIndexT num_rows = num_rows_, stride = stride_;
+  //for (int32 i = 0; i < num_rows; i++)
+    //for (int32 j = 0; j < i; j++)
+      //data[i * stride + j] = data[j * stride + i];
+//}

+/*
 template<typename Real>
 void MatrixBase<Real>::SymAddMat2(const Real alpha,
                                  const MatrixBase<Real> &A,
@ -734,7 +727,7 @@ void MatrixBase<Real>::LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U_in,
 }

 #endif
-
+*/
 // Copy constructor.  Copies data to newly allocated memory.
 template<typename Real>
 Matrix<Real>::Matrix (const MatrixBase<Real> & M,
@ -898,6 +891,7 @@ template
 void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M,
                                     MatrixTransposeType Trans);

+/*
 // Specialize the template for CopyFromSp for float, float.
 template<>
 template<>
@ -992,7 +986,7 @@ template
 void MatrixBase<double>::CopyFromTp(const TpMatrix<double> & M,
                                    MatrixTransposeType trans);

-
+*/
 template<typename Real>
 void MatrixBase<Real>::CopyRowsFromVec(const VectorBase<Real> &rv) {
  if (rv.Dim() == num_rows_*num_cols_) {
@ -1076,7 +1070,6 @@ void MatrixBase<Real>::CopyColsFromVec(const VectorBase<Real> &rv) {
  }
 }

-
 template<typename Real>
 void MatrixBase<Real>::CopyRowFromVec(const VectorBase<Real> &rv, const MatrixIndexT row) {
  KALDI_ASSERT(rv.Dim() == num_cols_ &&
@ -1088,7 +1081,7 @@ void MatrixBase<Real>::CopyRowFromVec(const VectorBase<Real> &rv, const MatrixIn

  std::memcpy(row_data, rv_data, num_cols_ * sizeof(Real));
 }
-
+/*
 template<typename Real>
 void MatrixBase<Real>::CopyDiagFromVec(const VectorBase<Real> &rv) {
  KALDI_ASSERT(rv.Dim() == std::min(num_cols_, num_rows_));
@ -1096,7 +1089,7 @@ void MatrixBase<Real>::CopyDiagFromVec(const VectorBase<Real> &rv) {
  Real *my_data = this->Data();
  for (; rv_data != rv_end; rv_data++, my_data += (this->stride_+1))
    *my_data = *rv_data;
-}
+}*/

 template<typename Real>
 void MatrixBase<Real>::CopyColFromVec(const VectorBase<Real> &rv,
@ -1135,7 +1128,7 @@ void Matrix<Real>::Destroy() {
 }


-
+/*
 template<typename Real>
 void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) {
  KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);
@ -1325,6 +1318,7 @@ void MatrixBase<Real>::MulColsVec(const VectorBase<Real> &scale) {
    }
  }
 }
+*/

 template<typename Real>
 void MatrixBase<Real>::SetZero() {
@ -1344,6 +1338,7 @@ void MatrixBase<Real>::Set(Real value) {
  }
 }

+/*
 template<typename Real>
 void MatrixBase<Real>::SetUnit() {
  SetZero();
@ -1374,6 +1369,7 @@ void MatrixBase<Real>::SetRandUniform() {
    }
  }
 }
+*/

 template<typename Real>
 void MatrixBase<Real>::Write(std::ostream &os, bool binary) const {
@ -1420,23 +1416,11 @@ void MatrixBase<Real>::Write(std::ostream &os, bool binary) const {


 template<typename Real>
-void MatrixBase<Real>::Read(std::istream & is, bool binary, bool add) {
-  if (add) {
-    Matrix<Real> tmp(num_rows_, num_cols_);
-    tmp.Read(is, binary, false);  // read without adding.
-    if (tmp.num_rows_ != this->num_rows_ || tmp.num_cols_ != this->num_cols_)
-      KALDI_ERR << "MatrixBase::Read, size mismatch "
-                << this->num_rows_ << ", " << this->num_cols_
-                << " vs. " << tmp.num_rows_ << ", " << tmp.num_cols_;
-    this->AddMat(1.0, tmp);
-    return;
-  }
-  // now assume add == false.
-
+void MatrixBase<Real>::Read(std::istream & is, bool binary) {
  //  In order to avoid rewriting this, we just declare a Matrix and
  // use it to read the data, then copy.
  Matrix<Real> tmp;
-  tmp.Read(is, binary, false);
+  tmp.Read(is, binary);
  if (tmp.NumRows() != NumRows() || tmp.NumCols() != NumCols()) {
    KALDI_ERR << "MatrixBase<Real>::Read, size mismatch "
              << NumRows() << " x " << NumCols() << " versus "
@ -1447,23 +1431,7 @@ void MatrixBase<Real>::Read(std::istream & is, bool binary, bool add) {


 template<typename Real>
-void Matrix<Real>::Read(std::istream & is, bool binary, bool add) {
-  if (add) {
-    Matrix<Real> tmp;
-    tmp.Read(is, binary, false);  // read without adding.
-    if (this->num_rows_ == 0) this->Resize(tmp.num_rows_, tmp.num_cols_);
-    else {
-      if (this->num_rows_ != tmp.num_rows_ || this->num_cols_ != tmp.num_cols_) {
-        if (tmp.num_rows_ == 0) return;  // do nothing in this case.
-        else KALDI_ERR << "Matrix::Read, size mismatch "
-                       << this->num_rows_ <<  ", " << this->num_cols_
-                       << " vs. " << tmp.num_rows_ << ", " << tmp.num_cols_;
-      }
-    }
-    this->AddMat(1.0, tmp);
-    return;
-  }
-
+void Matrix<Real>::Read(std::istream & is, bool binary) {
  // now assume add == false.
  MatrixIndexT pos_at_start = is.tellg();
  std::ostringstream specific_error;
@ -1472,10 +1440,10 @@ void Matrix<Real>::Read(std::istream & is, bool binary, bool add) {
    int peekval = Peek(is, binary);
    if (peekval == 'C') {
      // This code enables us to read CompressedMatrix as a regular matrix.
-      CompressedMatrix compressed_mat;
-      compressed_mat.Read(is, binary); // at this point, add == false.
-      this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols());
-      compressed_mat.CopyToMat(this);
+      //CompressedMatrix compressed_mat;
+      //compressed_mat.Read(is, binary); // at this point, add == false.
+      //this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols());
+      //compressed_mat.CopyToMat(this);
      return;
    }
    const char *my_token =  (sizeof(Real) == 4 ? "FM" : "DM");
@ -1483,7 +1451,7 @@ void Matrix<Real>::Read(std::istream & is, bool binary, bool add) {
    if (peekval == other_token_start) {  // need to instantiate the other type to read it.
      typedef typename OtherReal<Real>::Real OtherType;  // if Real == float, OtherType == double, and vice versa.
      Matrix<OtherType> other(this->num_rows_, this->num_cols_);
-      other.Read(is, binary, false);  // add is false at this point anyway.
+      other.Read(is, binary);  // add is false at this point anyway.
      this->Resize(other.NumRows(), other.NumCols());
      this->CopyFromMat(other);
      return;
@ -1672,7 +1640,7 @@ SubMatrix<Real>::SubMatrix(Real *data,
  }
 }

-
+/*
 template<typename Real>
 void MatrixBase<Real>::Add(const Real alpha) {
  Real *data = data_;
@ -1812,15 +1780,15 @@ void MatrixBase<Real>::DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
      for(int32 i = 0; i < NumRows(); i++)
      (*this)(i, i)  *= 1.00001;
      }*/
-  bool ans = JamaSvd(s, U, Vt);
-  if (Vt != NULL) Vt->Transpose();  // possibly to do: change this and also the transpose inside the JamaSvd routine.  note, Vt is square.
-  if (!ans) {
-    KALDI_ERR << "Error doing Svd";  // This one will be caught.
-  }
-#endif
-  if (prescale != 1.0) s->Scale(1.0/prescale);
-}
-
+//  bool ans = JamaSvd(s, U, Vt);
+  //if (Vt != NULL) Vt->Transpose();  // possibly to do: change this and also the transpose inside the JamaSvd routine.  note, Vt is square.
+  //if (!ans) {
+    //KALDI_ERR << "Error doing Svd";  // This one will be caught.
+  //}
+//#endif
+  //if (prescale != 1.0) s->Scale(1.0/prescale);
+//}
+/*
 template<typename Real>
 void MatrixBase<Real>::Svd(VectorBase<Real> *s, MatrixBase<Real> *U, MatrixBase<Real> *Vt) const {
  try {
@ -2052,17 +2020,18 @@ void MatrixBase<Real>::InvertDouble(Real *log_det, Real *det_sign,
  if (log_det) *log_det = log_det_tmp;
  if (det_sign) *det_sign = det_sign_tmp;
 }
+*/

-template<class Real>
-void MatrixBase<Real>::CopyFromMat(const CompressedMatrix &mat) {
-  mat.CopyToMat(this);
-}
+//template<class Real>
+//void MatrixBase<Real>::CopyFromMat(const CompressedMatrix &mat) {
+  //mat.CopyToMat(this);
+//}

-template<class Real>
-Matrix<Real>::Matrix(const CompressedMatrix &M): MatrixBase<Real>() {
-  Resize(M.NumRows(), M.NumCols(), kUndefined);
-  M.CopyToMat(this);
-}
+//template<class Real>
+//Matrix<Real>::Matrix(const CompressedMatrix &M): MatrixBase<Real>() {
+  //Resize(M.NumRows(), M.NumCols(), kUndefined);
+  //M.CopyToMat(this);
+//}



@ -2074,7 +2043,7 @@ void MatrixBase<Real>::InvertElements() {
    }
  }
 }
-
+/*
 template<typename Real>
 void MatrixBase<Real>::Transpose() {
  KALDI_ASSERT(num_rows_ == num_cols_);
@ -2250,7 +2219,7 @@ bool MatrixBase<Real>::Power(Real power) {
  (*this).AddMatMat(1.0, tmp, kNoTrans, P, kNoTrans, 0.0);
  return true;
 }
-
+*/
 template<typename Real>
 void Matrix<Real>::Swap(Matrix<Real> *other) {
  std::swap(this->data_, other->data_);
@ -2258,7 +2227,7 @@ void Matrix<Real>::Swap(Matrix<Real> *other) {
  std::swap(this->num_rows_, other->num_rows_);
  std::swap(this->stride_, other->stride_);
 }
-
+/*
 // Repeating this comment that appeared in the header:
 // Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
 // P^{-1}.  Be careful: the relationship of D to the eigenvalues we output is
@ -2298,7 +2267,7 @@ void MatrixBase<Real>::Eig(MatrixBase<Real> *P,
 // INT_32 mVersion;
 // INT_32 mSampSize;
 // };
-
+/*
 template<typename Real>
 bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr)
 {
@ -2821,7 +2790,7 @@ void MatrixBase<Real>::GroupMax(const MatrixBase<Real> &src) {
    }
  }
 }
-
+*/
 template<typename Real>
 void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
                                const MatrixIndexT *indices) {
@ -2847,7 +2816,7 @@ void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
  }
 }

-
+/*
 template<typename Real>
 void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src,
                               const MatrixIndexT *indices) {
@ -2871,8 +2840,9 @@ void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src,
        this_data[c] += src_data[*index_ptr];
    }
  }
-}
+}*/

+/*
 template<typename Real>
 void MatrixBase<Real>::CopyRows(const MatrixBase<Real> &src,
                                const MatrixIndexT *indices) {
@ -3022,9 +2992,9 @@ void MatrixBase<Real>::DiffTanh(const MatrixBase<Real> &value,
    value_data += value_stride;
    diff_data += diff_stride;
  }
-}
-
+}*/

+/*
 template<typename Real>
 template<typename OtherReal>
 void MatrixBase<Real>::AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v) {
@ -3087,7 +3057,7 @@ template void MatrixBase<double>::AddVecToCols(const double alpha,
                                               const VectorBase<float> &v);
 template void MatrixBase<double>::AddVecToCols(const double alpha,
                                               const VectorBase<double> &v);
-
+*/
 //Explicit instantiation of the classes
 //Apparently, it seems to be necessary that the instantiation
 //happens at the end of the file. Otherwise, not all the member
--- a/speechx/speechx/common/matrix/kaldi-matrix.h
+++ b/speechx/speechx/common/matrix/kaldi-matrix.h
@ -32,13 +32,6 @@ namespace kaldi {

 /// @{ \addtogroup matrix_funcs_scalar

-/// We need to declare this here as it will be a friend function.
-/// tr(A B), or tr(A B^T).
-template<typename Real>
-Real TraceMatMat(const MatrixBase<Real> &A, const MatrixBase<Real> &B,
-                 MatrixTransposeType trans = kNoTrans);
-/// @}
-
 /// \addtogroup matrix_group
 /// @{

@ -50,15 +43,8 @@ class MatrixBase {
 public:
  // so this child can access protected members of other instances.
  friend class Matrix<Real>;
+  friend class SubMatrix<Real>;
  // friend declarations for CUDA matrices (see ../cudamatrix/)
-  friend class CuMatrixBase<Real>;
-  friend class CuMatrix<Real>;
-  friend class CuSubMatrix<Real>;
-  friend class CuPackedMatrix<Real>;
-  friend class PackedMatrix<Real>;
-  friend class SparseMatrix<Real>;
-  friend class SparseMatrix<float>;
-  friend class SparseMatrix<double>;

  /// Returns number of rows (or zero for empty matrix).
  inline MatrixIndexT  NumRows() const { return num_rows_; }
@ -127,14 +113,6 @@ class MatrixBase {
  /// Sets all elements to a specific value.
  void Set(Real);
  /// Sets to zero, except ones along diagonal [for non-square matrices too]
-  void SetUnit();
-  /// Sets to random values of a normal distribution
-  void SetRandn();
-  /// Sets to numbers uniformly distributed on (0, 1)
-  void SetRandUniform();
-
-  /*  Copying functions.  These do not resize the matrix! */
-

  /// Copy given matrix. (no resize is done).
  template<typename OtherReal>
@ -142,21 +120,17 @@ class MatrixBase {
                   MatrixTransposeType trans = kNoTrans);

  /// Copy from compressed matrix.
-  void CopyFromMat(const CompressedMatrix &M);
-
-  /// Copy given spmatrix. (no resize is done).
-  template<typename OtherReal>
-  void CopyFromSp(const SpMatrix<OtherReal> &M);
+  //void CopyFromMat(const CompressedMatrix &M);

  /// Copy given tpmatrix. (no resize is done).
-  template<typename OtherReal>
-  void CopyFromTp(const TpMatrix<OtherReal> &M,
-                  MatrixTransposeType trans = kNoTrans);
+  //template<typename OtherReal>
+  //void CopyFromTp(const TpMatrix<OtherReal> &M,
+                  //MatrixTransposeType trans = kNoTrans);

  /// Copy from CUDA matrix.  Implemented in ../cudamatrix/cu-matrix.h
-  template<typename OtherReal>
-  void CopyFromMat(const CuMatrixBase<OtherReal> &M,
-                   MatrixTransposeType trans = kNoTrans);
+  //template<typename OtherReal>
+  //void CopyFromMat(const CuMatrixBase<OtherReal> &M,
+                   //MatrixTransposeType trans = kNoTrans);

  /// This function has two modes of operation.  If v.Dim() == NumRows() *
  /// NumCols(), then treats the vector as a row-by-row concatenation of a
@ -165,7 +139,7 @@ class MatrixBase {
  void CopyRowsFromVec(const VectorBase<Real> &v);

  /// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc
-  void CopyRowsFromVec(const CuVectorBase<Real> &v);
+  //void CopyRowsFromVec(const CuVectorBase<Real> &v);

  template<typename OtherReal>
  void CopyRowsFromVec(const VectorBase<OtherReal> &v);
@ -215,7 +189,7 @@ class MatrixBase {
    return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
  }

-  /* Various special functions. */
+/*
  /// Returns sum of all elements in matrix.
  Real Sum() const;
  /// Returns trace of matrix.
@ -268,15 +242,16 @@ class MatrixBase {
  /// Does inversion in double precision even if matrix was not double.
  void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
                      bool inverse_needed = true);
-
+*/
  /// Inverts all the elements of the matrix
  void InvertElements();
-
+/*
  /// Transpose the matrix.  This one is only
  /// applicable to square matrices (the one in the
  /// Matrix child class works also for non-square.
  void Transpose();

+*/
  /// Copies column r from column indices[r] of src.
  /// As a special case, if indexes[i] == -1, sets column i to zero.
  /// all elements of "indices" must be in [-1, src.NumCols()-1],
@ -296,8 +271,8 @@ class MatrixBase {
  /// indices.size() must equal this->NumCols(),
  /// all elements of "reorder" must be in [-1, src.NumCols()-1],
  /// and src.NumRows() must equal this.NumRows()
-  void AddCols(const MatrixBase<Real> &src,
-               const MatrixIndexT *indices);
+  //void AddCols(const MatrixBase<Real> &src,
+   //            const MatrixIndexT *indices);

  /// Copies row r of this matrix from an array of floats at the location given
  /// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero.
@ -314,30 +289,30 @@ class MatrixBase {
  /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
  /// If indexes[r] < 0, does not add anything. all elements of "indexes" must
  /// be in [-1, src.NumRows()-1], and src.NumCols() must equal this.NumCols().
-  void AddRows(Real alpha,
-               const MatrixBase<Real> &src,
-               const MatrixIndexT *indexes);
+ // void AddRows(Real alpha,
+  //             const MatrixBase<Real> &src,
+   //            const MatrixIndexT *indexes);

  /// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the
  /// beginning of a region of memory representing a vector of floats, of the
  /// same length as this.NumCols(). If src[r] is NULL, does not add anything.
-  void AddRows(Real alpha, const Real *const *src);
+  //void AddRows(Real alpha, const Real *const *src);

  /// For each row r of this matrix, adds it (times alpha) to the array of
  /// floats at the location given by dst[r]. If dst[r] is NULL, does not do
  /// anything for that row. Requires that none of the memory regions pointed
  /// to by the pointers in "dst" overlap (e.g. none of the pointers should be
  /// the same).
-  void AddToRows(Real alpha, Real *const *dst) const;
+  //void AddToRows(Real alpha, Real *const *dst) const;

  /// For each row i of *this, adds this->Row(i) to
  /// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing.
  /// Requires that all the indexes[i] that are >= 0
  /// be distinct, otherwise the behavior is undefined.
-  void AddToRows(Real alpha,
-                 const MatrixIndexT *indexes,
-                 MatrixBase<Real> *dst) const;
-
+  //void AddToRows(Real alpha,
+   //              const MatrixIndexT *indexes,
+    //             MatrixBase<Real> *dst) const;
+/*
  inline void ApplyPow(Real power) {
    this -> Pow(*this, power);
  }
@ -374,7 +349,7 @@ class MatrixBase {
  inline void ApplyLog() {
    this -> Log(*this);
  }
-
+*/
  /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
  /// P^{-1}.  Be careful: the relationship of D to the eigenvalues we output is
  /// slightly complicated, due to the need for P to be real.  In the symmetric
@ -389,9 +364,9 @@ class MatrixBase {
  /// instead (*this) P = P D.
  ///
  /// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag.
-  void Eig(MatrixBase<Real> *P,
-           VectorBase<Real> *eigs_real,
-           VectorBase<Real> *eigs_imag) const;
+  //void Eig(MatrixBase<Real> *P,
+   //        VectorBase<Real> *eigs_real,
+    //       VectorBase<Real> *eigs_imag) const;

  /// The Power method attempts to take the matrix to a power using a method that
  /// works in general for fractional and negative powers.  The input matrix must
@ -400,7 +375,7 @@ class MatrixBase {
  /// return false and leave the matrix unchanged, if at entry the matrix had
  /// real negative eigenvalues (or if it had zero eigenvalues and the power was
  /// negative).
-  bool Power(Real pow);
+//  bool Power(Real pow);

  /** Singular value decomposition
     Major limitations:
@ -413,31 +388,32 @@ class MatrixBase {
     expect that S.Dim() == m, U is either NULL or m by n,
     and v is either NULL or n by n.
     The singular values are not sorted (use SortSvd for that).  */
-  void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
-                      MatrixBase<Real> *Vt);  // Destroys calling matrix.
+  //void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
+   //                   MatrixBase<Real> *Vt);  // Destroys calling matrix.

  /// Compute SVD (*this) = U diag(s) Vt.   Note that the V in the call is already
  /// transposed; the normal formulation is U diag(s) V^T.
  /// Null pointers for U or V mean we don't want that output (this saves
  /// compute).  The singular values are not sorted (use SortSvd for that).
-  void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
-           MatrixBase<Real> *Vt) const;
+  //void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
+   //        MatrixBase<Real> *Vt) const;
  /// Compute SVD but only retain the singular values.
-  void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
+  //void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }


  /// Returns smallest singular value.
-  Real MinSingularValue() const {
-    Vector<Real> tmp(std::min(NumRows(), NumCols()));
-    Svd(&tmp);
-    return tmp.Min();
-  }
+  //Real MinSingularValue() const {
+   // Vector<Real> tmp(std::min(NumRows(), NumCols()));
+    //Svd(&tmp);
+    //return tmp.Min();
+  //}

-  void TestUninitialized() const; // This function is designed so that if any element
+  //void TestUninitialized() const; // This function is designed so that if any element
  // if the matrix is uninitialized memory, valgrind will complain.

  /// Returns condition number by computing Svd.  Works even if cols > rows.
  /// Returns infinity if all singular values are zero.
+  /*
  Real Cond() const;

  /// Returns true if matrix is Symmetric.
@ -559,7 +535,7 @@ class MatrixBase {
  // element-by-element, set *this = diff * (1.0 - value^2).
  void DiffTanh(const MatrixBase<Real> &value,
                const MatrixBase<Real> &diff);
-
+*/
  /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
   * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
   * orthogonal matrix so rP^{-1} = rP^T.   Throws exception if input was not
@ -571,208 +547,15 @@ class MatrixBase {
   * SpMatrix and use Eig() function there, which uses eigenvalue decomposition
   * directly rather than SVD.
  */
-  void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
-                        Real check_thresh = 0.001);
-
-  friend Real kaldi::TraceMatMat<Real>(const MatrixBase<Real> &A,
-      const MatrixBase<Real> &B, MatrixTransposeType trans);  // tr (A B)
-
-  // so it can get around const restrictions on the pointer to data_.
-  friend class SubMatrix<Real>;
-
-  /// Add a scalar to each element
-  void Add(const Real alpha);
-
-  /// Add a scalar to each diagonal element.
-  void AddToDiag(const Real alpha);
-
-  /// *this += alpha * a * b^T
-  template<typename OtherReal>
-  void AddVecVec(const Real alpha, const VectorBase<OtherReal> &a,
-                 const VectorBase<OtherReal> &b);
-
-  /// [each row of *this] += alpha * v
-  template<typename OtherReal>
-  void AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v);
-
-  /// [each col of *this] += alpha * v
-  template<typename OtherReal>
-  void AddVecToCols(const Real alpha, const VectorBase<OtherReal> &v);
-
-  /// *this += alpha * M [or M^T]
-  void AddMat(const Real alpha, const MatrixBase<Real> &M,
-              MatrixTransposeType transA = kNoTrans);
-
-  /// *this += alpha * A [or A^T].
-  void AddSmat(Real alpha, const SparseMatrix<Real> &A,
-               MatrixTransposeType trans = kNoTrans);
-
-  /// (*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
-  /// Multiplication of sparse with dense matrix.  See also AddMatSmat.
-  void AddSmatMat(Real alpha, const SparseMatrix<Real> &A,
-                  MatrixTransposeType transA, const MatrixBase<Real> &B,
-                  Real beta);
-
-  /// (*this) = alpha * A * op(B) + beta * (*this), where B is sparse
-  /// and op(B) is either B or trans(B) depending on the 'transB' argument.
-  /// This is multiplication of a dense by a sparse matrix.  See also
-  /// AddSmatMat.
-  void AddMatSmat(Real alpha, const MatrixBase<Real> &A,
-                  const SparseMatrix<Real> &B, MatrixTransposeType transB,
-                  Real beta);
-
-  /// *this = beta * *this + alpha * M M^T, for symmetric matrices.  It only
-  /// updates the lower triangle of *this.  It will leave the matrix asymmetric;
-  /// if you need it symmetric as a regular matrix, do CopyLowerToUpper().
-  void SymAddMat2(const Real alpha, const MatrixBase<Real> &M,
-                  MatrixTransposeType transA, Real beta);
-
-  /// *this = beta * *this + alpha * diag(v) * M [or M^T].
-  /// The same as adding M but scaling each row M_i by v(i).
-  void AddDiagVecMat(const Real alpha, const VectorBase<Real> &v,
-                     const MatrixBase<Real> &M, MatrixTransposeType transM,
-                     Real beta = 1.0);
-
-  /// *this = beta * *this + alpha * M [or M^T] * diag(v)
-  /// The same as adding M but scaling each column M_j by v(j).
-  void AddMatDiagVec(const Real alpha,
-                     const MatrixBase<Real> &M, MatrixTransposeType transM,
-                     VectorBase<Real> &v,
-                     Real beta = 1.0);
-
-  /// *this = beta * *this + alpha * A .* B (.* element by element multiplication)
-  void AddMatMatElements(const Real alpha,
-                         const MatrixBase<Real>& A,
-                         const MatrixBase<Real>& B,
-                         const Real beta);
-
-  /// *this += alpha * S
-  template<typename OtherReal>
-  void AddSp(const Real alpha, const SpMatrix<OtherReal> &S);
-
-  void AddMatMat(const Real alpha,
-                 const MatrixBase<Real>& A, MatrixTransposeType transA,
-                 const MatrixBase<Real>& B, MatrixTransposeType transB,
-                 const Real beta);
-
-  /// *this = a * b / c (by element; when c = 0, *this = a)
-  void SetMatMatDivMat(const MatrixBase<Real>& A,
-                       const MatrixBase<Real>& B,
-                       const MatrixBase<Real>& C);
-
-  /// A version of AddMatMat specialized for when the second argument
-  /// contains a lot of zeroes.
-  void AddMatSmat(const Real alpha,
-                  const MatrixBase<Real>& A, MatrixTransposeType transA,
-                  const MatrixBase<Real>& B, MatrixTransposeType transB,
-                  const Real beta);
-
-  /// A version of AddMatMat specialized for when the first argument
-  /// contains a lot of zeroes.
-  void AddSmatMat(const Real alpha,
-                  const MatrixBase<Real>& A, MatrixTransposeType transA,
-                  const MatrixBase<Real>& B, MatrixTransposeType transB,
-                  const Real beta);
-
-  /// this <-- beta*this + alpha*A*B*C.
-  void AddMatMatMat(const Real alpha,
-                    const MatrixBase<Real>& A, MatrixTransposeType transA,
-                    const MatrixBase<Real>& B, MatrixTransposeType transB,
-                    const MatrixBase<Real>& C, MatrixTransposeType transC,
-                    const Real beta);
-
-  /// this <-- beta*this + alpha*SpA*B.
-  // This and the routines below are really
-  // stubs that need to be made more efficient.
-  void AddSpMat(const Real alpha,
-                const SpMatrix<Real>& A,
-                const MatrixBase<Real>& B, MatrixTransposeType transB,
-                const Real beta) {
-    Matrix<Real> M(A);
-    return AddMatMat(alpha, M, kNoTrans, B, transB, beta);
-  }
-  /// this <-- beta*this + alpha*A*B.
-  void AddTpMat(const Real alpha,
-                const TpMatrix<Real>& A, MatrixTransposeType transA,
-                const MatrixBase<Real>& B, MatrixTransposeType transB,
-                const Real beta) {
-    Matrix<Real> M(A);
-    return AddMatMat(alpha, M, transA, B, transB, beta);
-  }
-  /// this <-- beta*this + alpha*A*B.
-  void AddMatSp(const Real alpha,
-                const MatrixBase<Real>& A, MatrixTransposeType transA,
-                const SpMatrix<Real>& B,
-                const Real beta) {
-    Matrix<Real> M(B);
-    return AddMatMat(alpha, A, transA, M, kNoTrans, beta);
-  }
-  /// this <-- beta*this + alpha*A*B*C.
-  void AddSpMatSp(const Real alpha,
-                  const SpMatrix<Real> &A,
-                  const MatrixBase<Real>& B, MatrixTransposeType transB,
-                  const SpMatrix<Real>& C,
-                const Real beta) {
-    Matrix<Real> M(A), N(C);
-    return AddMatMatMat(alpha, M, kNoTrans, B, transB, N, kNoTrans, beta);
-  }
-  /// this <-- beta*this + alpha*A*B.
-  void AddMatTp(const Real alpha,
-                const MatrixBase<Real>& A, MatrixTransposeType transA,
-                const TpMatrix<Real>& B, MatrixTransposeType transB,
-                const Real beta) {
-    Matrix<Real> M(B);
-    return AddMatMat(alpha, A, transA, M, transB, beta);
-  }
-
-  /// this <-- beta*this + alpha*A*B.
-  void AddTpTp(const Real alpha,
-               const TpMatrix<Real>& A, MatrixTransposeType transA,
-               const TpMatrix<Real>& B, MatrixTransposeType transB,
-               const Real beta) {
-    Matrix<Real> M(A), N(B);
-    return AddMatMat(alpha, M, transA, N, transB, beta);
-  }
-
-  /// this <-- beta*this + alpha*A*B.
-  // This one is more efficient, not like the others above.
-  void AddSpSp(const Real alpha,
-               const SpMatrix<Real>& A, const SpMatrix<Real>& B,
-               const Real beta);
-
-  /// Copy lower triangle to upper triangle (symmetrize)
-  void CopyLowerToUpper();
-
-  /// Copy upper triangle to lower triangle (symmetrize)
-  void CopyUpperToLower();
-
-  /// This function orthogonalizes the rows of a matrix using the Gram-Schmidt
-  /// process.  It is only applicable if NumRows() <= NumCols().  It will use
-  /// random number generation to fill in rows with something nonzero, in cases
-  /// where the original matrix was of deficient row rank.
-  void OrthogonalizeRows();

  /// stream read.
  /// Use instead of stream<<*this, if you want to add to existing contents.
  // Will throw exception on failure.
-  void Read(std::istream & in, bool binary, bool add = false);
+  void Read(std::istream & in, bool binary);
  /// write to stream.
  void Write(std::ostream & out, bool binary) const;

  // Below is internal methods for Svd, user does not have to know about this.
-#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD)
-  // protected:
-  // Should be protected but used directly in testing routine.
-  // destroys *this!
-  void LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U,
-                     MatrixBase<Real> *Vt);
-#else
- protected:
-  // destroys *this!
-  bool JamaSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
-               MatrixBase<Real> *V);
-
-#endif
 protected:

  ///  Initializer, callable only from child.
@ -827,19 +610,9 @@ class Matrix : public MatrixBase<Real> {
         MatrixStrideType stride_type = kDefaultStride):
      MatrixBase<Real>() { Resize(r, c, resize_type, stride_type); }

-  /// Copy constructor from CUDA matrix
-  /// This is defined in ../cudamatrix/cu-matrix.h
-  template<typename OtherReal>
-  explicit Matrix(const CuMatrixBase<OtherReal> &cu,
-                  MatrixTransposeType trans = kNoTrans);
-
-
  /// Swaps the contents of *this and *other.  Shallow swap.
  void Swap(Matrix<Real> *other);

-  /// Defined in ../cudamatrix/cu-matrix.cc
-  void Swap(CuMatrix<Real> *mat);
-
  /// Constructor from any MatrixBase. Can also copy with transpose.
  /// Allocates new memory.
  explicit Matrix(const MatrixBase<Real> & M,
@ -853,40 +626,29 @@ class Matrix : public MatrixBase<Real> {
  explicit Matrix(const MatrixBase<OtherReal> & M,
                    MatrixTransposeType trans = kNoTrans);

-  /// Copy constructor taking SpMatrix...
-  /// It is symmetric, so no option for transpose, and NumRows == Cols
-  template<typename OtherReal>
-  explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
-    Resize(M.NumRows(), M.NumRows(), kUndefined);
-    this->CopyFromSp(M);
-  }
-
-  /// Constructor from CompressedMatrix
-  explicit Matrix(const CompressedMatrix &C);
-
  /// Copy constructor taking TpMatrix...
-  template <typename OtherReal>
-  explicit Matrix(const TpMatrix<OtherReal> & M,
-                  MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
-    if (trans == kNoTrans) {
-      Resize(M.NumRows(), M.NumCols(), kUndefined);
-      this->CopyFromTp(M);
-    } else {
-      Resize(M.NumCols(), M.NumRows(), kUndefined);
-      this->CopyFromTp(M, kTrans);
-    }
-  }
+  //template <typename OtherReal>
+  //explicit Matrix(const TpMatrix<OtherReal> & M,
+                  //MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
+    //if (trans == kNoTrans) {
+      //Resize(M.NumRows(), M.NumCols(), kUndefined);
+      //this->CopyFromTp(M);
+    //} else {
+      //Resize(M.NumCols(), M.NumRows(), kUndefined);
+      //this->CopyFromTp(M, kTrans);
+    //}
+  //}

  /// read from stream.
  // Unlike one in base, allows resizing.
-  void Read(std::istream & in, bool binary, bool add = false);
+  void Read(std::istream & in, bool binary);

  /// Remove a specified row.
  void RemoveRow(MatrixIndexT i);

  /// Transpose the matrix.  Works for non-square
  /// matrices as well as square ones.
-  void Transpose();
+  //void Transpose();

  /// Distructor to free matrices.
  ~Matrix() { Destroy(); }
@ -947,37 +709,6 @@ class Matrix : public MatrixBase<Real> {

 /// A structure containing the HTK header.
 /// [TODO: change the style of the variables to Kaldi-compliant]
-struct HtkHeader {
-  /// Number of samples.
-  int32    mNSamples;
-  /// Sample period.
-  int32    mSamplePeriod;
-  /// Sample size
-  int16    mSampleSize;
-  /// Sample kind.
-  uint16   mSampleKind;
-};
-
-// Read HTK formatted features from file into matrix.
-template<typename Real>
-bool ReadHtk(std::istream &is, Matrix<Real> *M, HtkHeader *header_ptr);
-
-// Write (HTK format) features to file from matrix.
-template<typename Real>
-bool WriteHtk(std::ostream &os, const MatrixBase<Real> &M, HtkHeader htk_hdr);
-
-// Write (CMUSphinx format) features to file from matrix.
-template<typename Real>
-bool WriteSphinx(std::ostream &os, const MatrixBase<Real> &M);
-
-/// @} end of "addtogroup matrix_funcs_io"
-
-/**
-  Sub-matrix representation.
-  Can work with sub-parts of a matrix using this class.
-  Note that SubMatrix is not very const-correct-- it allows you to
-  change the contents of a const Matrix.  Be careful!
-*/

 template<typename Real>
 class SubMatrix : public MatrixBase<Real> {
@ -1012,6 +743,7 @@ class SubMatrix : public MatrixBase<Real> {
  /// Disallow assignment.
  SubMatrix<Real> &operator = (const SubMatrix<Real> &other);
 };
+
 /// @} End of "addtogroup matrix_funcs_io".

 /// \addtogroup matrix_funcs_scalar
@ -1019,7 +751,7 @@ class SubMatrix : public MatrixBase<Real> {

 // Some declarations.  These are traces of products.

-
+/************************
 template<typename Real>
 bool ApproxEqual(const MatrixBase<Real> &A,
                 const MatrixBase<Real> &B, Real tol = 0.01) {
@ -1085,7 +817,7 @@ void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real>
 template<typename Real>
 bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);

-
+**********/

 /// @} end of addtogroup matrix_funcs_misc

@ -1101,7 +833,6 @@ std::istream & operator >> (std::istream & In, MatrixBase<Real> & M);
 template<typename Real>
 std::istream & operator >> (std::istream & In, Matrix<Real> & M);

-
 template<typename Real>
 bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
  return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
--- a/speechx/speechx/common/matrix/kaldi-vector-inl.h
+++ b/speechx/speechx/common/matrix/kaldi-vector-inl.h
@ -44,14 +44,14 @@ std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
  return is;
 }

-template<>
-template<>
-void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
-
-template<>
-template<>
-void VectorBase<double>::AddVec<double>(const double alpha,
-                                        const VectorBase<double> &rv);
+//template<>
+//template<>
+//void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
+
+//template<>
+//template<>
+//void VectorBase<double>::AddVec<double>(const double alpha,
+                                        //const VectorBase<double> &rv);

 }  // namespace kaldi

--- a/speechx/speechx/common/matrix/kaldi-vector.cc
+++ b/speechx/speechx/common/matrix/kaldi-vector.cc
--- a/speechx/speechx/common/matrix/kaldi-vector.h
+++ b/speechx/speechx/common/matrix/kaldi-vector.h
@ -0,0 +1,345 @@
+// matrix/kaldi-vector.h
+
+// Copyright 2009-2012   Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
+//                       Saarland University (Author: Arnab Ghoshal);
+//                       Ariya Rastrow;  Petr Schwarz;  Yanmin Qian;
+//                       Karel Vesely;  Go Vivace Inc.;  Arnab Ghoshal
+//                       Wei Shi;
+//                2015   Guoguo Chen
+//                2017   Daniel Galvez
+//                2019   Yiwen Shao
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_KALDI_VECTOR_H_
+#define KALDI_MATRIX_KALDI_VECTOR_H_ 1
+
+#include "matrix/matrix-common.h"
+
+namespace kaldi {
+
+/// \addtogroup matrix_group
+/// @{
+
+///  Provides a vector abstraction class.
+///  This class provides a way to work with vectors in kaldi.
+///  It encapsulates basic operations and memory optimizations.
+template<typename Real>
+class VectorBase {
+ public:
+  /// Set vector to all zeros.
+  void SetZero();
+
+  /// Returns true if matrix is all zeros.
+  bool IsZero(Real cutoff = 1.0e-06) const;     // replace magic number
+
+  /// Set all members of a vector to a specified value.
+  void Set(Real f);
+
+  /// Returns the  dimension of the vector.
+  inline MatrixIndexT Dim() const { return dim_; }
+
+  /// Returns the size in memory of the vector, in bytes.
+  inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); }
+
+  /// Returns a pointer to the start of the vector's data.
+  inline Real* Data() { return data_; }
+
+  /// Returns a pointer to the start of the vector's data (const).
+  inline const Real* Data() const { return data_; }
+
+  /// Indexing  operator (const).
+  inline Real operator() (MatrixIndexT i) const {
+    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(dim_));
+    return *(data_ + i);
+  }
+
+  /// Indexing operator (non-const).
+  inline Real & operator() (MatrixIndexT i) {
+    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(dim_));
+    return *(data_ + i);
+  }
+
+  /** @brief Returns a sub-vector of a vector (a range of elements).
+   *  @param o [in] Origin, 0 < o < Dim()
+   *  @param l [in] Length 0 < l < Dim()-o
+   *  @return A SubVector object that aliases the data of the Vector object.
+   *  See @c SubVector class for details   */
+  SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
+    return SubVector<Real>(*this, o, l);
+  }
+
+  /** @brief Returns a const sub-vector of a vector (a range of elements).
+   *  @param o [in] Origin, 0 < o < Dim()
+   *  @param l [in] Length 0 < l < Dim()-o
+   *  @return A SubVector object that aliases the data of the Vector object.
+   *  See @c SubVector class for details   */
+  const SubVector<Real> Range(const MatrixIndexT o,
+                              const MatrixIndexT l) const {
+    return SubVector<Real>(*this, o, l);
+  }
+
+  /// Copy data from another vector (must match own size).
+  void CopyFromVec(const VectorBase<Real> &v);
+
+  /// Copy data from another vector of different type (double vs. float)
+  template<typename OtherReal>
+  void CopyFromVec(const VectorBase<OtherReal> &v);
+
+  /// Performs a row stack of the matrix M
+  void CopyRowsFromMat(const MatrixBase<Real> &M);
+  template<typename OtherReal>
+  void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
+
+  /// Performs a column stack of the matrix M
+  void CopyColsFromMat(const MatrixBase<Real> &M);
+
+  /// Extracts a row of the matrix M.  Could also do this with
+  /// this->Copy(M[row]).
+  void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
+  /// Extracts a row of the matrix M with type conversion.
+  template<typename OtherReal>
+  void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
+
+  /// Extracts a column of the matrix M.
+  template<typename OtherReal>
+  void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
+
+  /// Reads from C++ stream (option to add to existing contents).
+  /// Throws exception on failure
+  void Read(std::istream &in, bool binary);
+
+  /// Writes to C++ stream (option to write in binary).
+  void Write(std::ostream &Out, bool binary) const;
+
+  friend class VectorBase<double>;
+  friend class VectorBase<float>;
+ protected:
+  /// Destructor;  does not deallocate memory, this is handled by child classes.
+  /// This destructor is protected so this object can only be
+  /// deleted via a child.
+  ~VectorBase() {}
+
+  /// Empty initializer, corresponds to vector of zero size.
+  explicit VectorBase(): data_(NULL), dim_(0) {
+    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
+  }
+
+  /// data memory area
+  Real* data_;
+  /// dimension of vector
+  MatrixIndexT dim_;
+  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
+}; // class VectorBase
+
+/** @brief A class representing a vector.
+ *
+ *  This class provides a way to work with vectors in kaldi.
+ *  It encapsulates basic operations and memory optimizations.  */
+template<typename Real>
+class Vector: public VectorBase<Real> {
+ public:
+  /// Constructor that takes no arguments.  Initializes to empty.
+  Vector(): VectorBase<Real>() {}
+
+  /// Constructor with specific size.  Sets to all-zero by default
+  /// if set_zero == false, memory contents are undefined.
+  explicit Vector(const MatrixIndexT s,
+                  MatrixResizeType resize_type = kSetZero)
+      : VectorBase<Real>() {  Resize(s, resize_type);  }
+
+  /// Copy constructor from CUDA vector
+  /// This is defined in ../cudamatrix/cu-vector.h
+  //template<typename OtherReal>
+  //explicit Vector(const CuVectorBase<OtherReal> &cu);
+
+  /// Copy constructor.  The need for this is controversial.
+  Vector(const Vector<Real> &v) : VectorBase<Real>()  { //  (cannot be explicit)
+    Resize(v.Dim(), kUndefined);
+    this->CopyFromVec(v);
+  }
+
+  /// Copy-constructor from base-class, needed to copy from SubVector.
+  explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
+    Resize(v.Dim(), kUndefined);
+    this->CopyFromVec(v);
+  }
+
+  /// Type conversion constructor.
+  template<typename OtherReal>
+  explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
+    Resize(v.Dim(), kUndefined);
+    this->CopyFromVec(v);
+  }
+
+// Took this out since it is unsafe : Arnab
+//  /// Constructor from a pointer and a size; copies the data to a location
+//  /// it owns.
+//  Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
+//    Resize(s);
+  //    CopyFromPtr(Data, s);
+//  }
+
+
+  /// Swaps the contents of *this and *other.  Shallow swap.
+  void Swap(Vector<Real> *other);
+
+  /// Destructor.  Deallocates memory.
+  ~Vector() { Destroy(); }
+
+  /// Read function using C++ streams.  Can also add to existing contents
+  /// of matrix.
+  void Read(std::istream &in, bool binary);
+
+  /// Set vector to a specified size (can be zero).
+  /// The value of the new data depends on resize_type:
+  ///   -if kSetZero, the new data will be zero
+  ///   -if kUndefined, the new data will be undefined
+  ///   -if kCopyData, the new data will be the same as the old data in any
+  ///      shared positions, and zero elsewhere.
+  /// This function takes time proportional to the number of data elements.
+  void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);
+
+  /// Remove one element and shifts later elements down.
+  void RemoveElement(MatrixIndexT i);
+
+  /// Assignment operator.
+  Vector<Real> &operator = (const Vector<Real> &other) {
+    Resize(other.Dim(), kUndefined);
+    this->CopyFromVec(other);
+    return *this;
+  }
+
+  /// Assignment operator that takes VectorBase.
+  Vector<Real> &operator = (const VectorBase<Real> &other) {
+    Resize(other.Dim(), kUndefined);
+    this->CopyFromVec(other);
+    return *this;
+  }
+ private:
+  /// Init assumes the current contents of the class are invalid (i.e. junk or
+  /// has already been freed), and it sets the vector to newly allocated memory
+  /// with the specified dimension.  dim == 0 is acceptable.  The memory contents
+  /// pointed to by data_ will be undefined.
+  void Init(const MatrixIndexT dim);
+
+  /// Destroy function, called internally.
+  void Destroy();
+
+};
+
+
+/// Represents a non-allocating general vector which can be defined
+/// as a sub-vector of higher-level vector [or as the row of a matrix].
+template<typename Real>
+class SubVector : public VectorBase<Real> {
+ public:
+  /// Constructor from a Vector or SubVector.
+  /// SubVectors are not const-safe and it's very hard to make them
+  /// so for now we just give up.  This function contains const_cast.
+  SubVector(const VectorBase<Real> &t, const MatrixIndexT origin,
+            const MatrixIndexT length) : VectorBase<Real>() {
+    // following assert equiv to origin>=0 && length>=0 &&
+    // origin+length <= rt.dim_
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
+                 static_cast<UnsignedMatrixIndexT>(length) <=
+                 static_cast<UnsignedMatrixIndexT>(t.Dim()));
+    VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
+    VectorBase<Real>::dim_   = length;
+  }
+
+  /// This constructor initializes the vector to point at the contents
+  /// of this packed matrix (SpMatrix or TpMatrix).
+ // SubVector(const PackedMatrix<Real> &M) {
+    //VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
+    //VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
+  //}
+
+  /// Copy constructor
+  SubVector(const SubVector &other) : VectorBase<Real> () {
+    // this copy constructor needed for Range() to work in base class.
+    VectorBase<Real>::data_ = other.data_;
+    VectorBase<Real>::dim_ = other.dim_;
+  }
+
+  /// Constructor from a pointer to memory and a length.  Keeps a pointer
+  /// to the data but does not take ownership (will never delete).
+  /// Caution: this constructor enables you to evade const constraints.
+  SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real> () {
+    VectorBase<Real>::data_ = const_cast<Real*>(data);
+    VectorBase<Real>::dim_   = length;
+  }
+
+  /// This operation does not preserve const-ness, so be careful.
+  SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
+    VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
+    VectorBase<Real>::dim_   = matrix.NumCols();
+  }
+
+  ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).
+
+ private:
+  /// Disallow assignment operator.
+  SubVector & operator = (const SubVector &other) {}
+};
+
+/// @} end of "addtogroup matrix_group"
+/// \addtogroup matrix_funcs_io
+/// @{
+/// Output to a C++ stream.  Non-binary by default (use Write for
+/// binary output).
+template<typename Real>
+std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v);
+
+/// Input from a C++ stream.  Will automatically read text or
+/// binary data from the stream.
+template<typename Real>
+std::istream & operator >> (std::istream & in, VectorBase<Real> & v);
+
+/// Input from a C++ stream. Will automatically read text or
+/// binary data from the stream.
+template<typename Real>
+std::istream & operator >> (std::istream & in, Vector<Real> & v);
+/// @} end of \addtogroup matrix_funcs_io
+
+/// \addtogroup matrix_funcs_scalar
+/// @{
+
+
+//template<typename Real>
+//bool ApproxEqual(const VectorBase<Real> &a,
+                 //const VectorBase<Real> &b, Real tol = 0.01) {
+  //return a.ApproxEqual(b, tol);
+//}
+
+//template<typename Real>
+//inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
+                        //float tol = 0.01) {
+  //KALDI_ASSERT(a.ApproxEqual(b, tol));
+//}
+
+
+
+}  // namespace kaldi
+
+// we need to include the implementation
+#include "matrix/kaldi-vector-inl.h"
+
+
+
+#endif  // KALDI_MATRIX_KALDI_VECTOR_H_
--- a/speechx/speechx/common/matrix/matrix-common.h
+++ b/speechx/speechx/common/matrix/matrix-common.h
@ -59,26 +59,7 @@ template<typename Real> class SubVector;
 template<typename Real> class MatrixBase;
 template<typename Real> class SubMatrix;
 template<typename Real> class Matrix;
-template<typename Real> class SpMatrix;
-template<typename Real> class TpMatrix;
-template<typename Real> class PackedMatrix;
-template<typename Real> class SparseMatrix;
-
-// these are classes that won't be defined in this
-// directory; they're mostly needed for friend declarations.
-template<typename Real> class CuMatrixBase;
-template<typename Real> class CuSubMatrix;
-template<typename Real> class CuMatrix;
-template<typename Real> class CuVectorBase;
-template<typename Real> class CuSubVector;
-template<typename Real> class CuVector;
-template<typename Real> class CuPackedMatrix;
-template<typename Real> class CuSpMatrix;
-template<typename Real> class CuTpMatrix;
-template<typename Real> class CuSparseMatrix;
-
-class CompressedMatrix;
-class GeneralMatrix;
+

 /// This class provides a way for switching between double and float types.
 template<typename T> class OtherReal { };  // useful in reading+writing routines
--- a/speechx/speechx/kaldi/CMakeLists.txt
+++ b/speechx/speechx/kaldi/CMakeLists.txt
@ -5,8 +5,6 @@ ${CMAKE_CURRENT_SOURCE_DIR}

 add_subdirectory(base)
 add_subdirectory(util)
-add_subdirectory(feat)
-add_subdirectory(matrix)
 add_subdirectory(lat)
 add_subdirectory(fstext)
 add_subdirectory(decoder)
--- a/speechx/speechx/kaldi/feat/CMakeLists.txt
+++ b/speechx/speechx/kaldi/feat/CMakeLists.txt
@ -1,20 +0,0 @@
-add_library(kaldi-mfcc
-  feature-mfcc.cc
-)
-target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
-
-add_library(kaldi-fbank
-  feature-fbank.cc
-)
-target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
-
-add_library(kaldi-feat-common
-  wave-reader.cc
-  signal.cc
-  feature-functions.cc
-  feature-window.cc
-  resample.cc
-  mel-computations.cc
-  cmvn.cc
-)
-target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
--- a/speechx/speechx/kaldi/feat/cmvn.cc
+++ b/speechx/speechx/kaldi/feat/cmvn.cc
@ -1,183 +0,0 @@
-// transform/cmvn.cc
-
-// Copyright 2009-2013 Microsoft Corporation
-//                     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "feat/cmvn.h"
-
-namespace kaldi {
-
-void InitCmvnStats(int32 dim, Matrix<double> *stats) {
-  KALDI_ASSERT(dim > 0);
-  stats->Resize(2, dim+1);
-}
-
-void AccCmvnStats(const VectorBase<BaseFloat> &feats, BaseFloat weight, MatrixBase<double> *stats) {
-  int32 dim = feats.Dim();
-  KALDI_ASSERT(stats != NULL);
-  KALDI_ASSERT(stats->NumRows() == 2 && stats->NumCols() == dim + 1);
-  // Remove these __restrict__ modifiers if they cause compilation problems.
-  // It's just an optimization.
-   double *__restrict__ mean_ptr = stats->RowData(0),
-       *__restrict__ var_ptr = stats->RowData(1),
-       *__restrict__ count_ptr = mean_ptr + dim;
-   const BaseFloat * __restrict__ feats_ptr = feats.Data();
-  *count_ptr += weight;
-  // Careful-- if we change the format of the matrix, the "mean_ptr < count_ptr"
-  // statement below might become wrong.
-  for (; mean_ptr < count_ptr; mean_ptr++, var_ptr++, feats_ptr++) {
-    *mean_ptr += *feats_ptr * weight;
-    *var_ptr +=  *feats_ptr * *feats_ptr * weight;
-  }
-}
-
-void AccCmvnStats(const MatrixBase<BaseFloat> &feats,
-                  const VectorBase<BaseFloat> *weights,
-                  MatrixBase<double> *stats) {
-  int32 num_frames = feats.NumRows();
-  if (weights != NULL) {
-    KALDI_ASSERT(weights->Dim() == num_frames);
-  }
-  for (int32 i = 0; i < num_frames; i++) {
-    SubVector<BaseFloat> this_frame = feats.Row(i);
-    BaseFloat weight = (weights == NULL ? 1.0 : (*weights)(i));
-    if (weight != 0.0)
-      AccCmvnStats(this_frame, weight, stats);
-  }
-}
-
-void ApplyCmvn(const MatrixBase<double> &stats,
-               bool var_norm,
-               MatrixBase<BaseFloat> *feats) {
-  KALDI_ASSERT(feats != NULL);
-  int32 dim = stats.NumCols() - 1;
-  if (stats.NumRows() > 2 || stats.NumRows() < 1 || feats->NumCols() != dim) {
-    KALDI_ERR << "Dim mismatch: cmvn "
-              << stats.NumRows() << 'x' << stats.NumCols()
-              << ", feats " << feats->NumRows() << 'x' << feats->NumCols();
-  }
-  if (stats.NumRows() == 1 && var_norm)
-    KALDI_ERR << "You requested variance normalization but no variance stats "
-              << "are supplied.";
-
-  double count = stats(0, dim);
-  // Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
-  // computing an offset and representing it as stats, we use a count of one.
-  if (count < 1.0)
-    KALDI_ERR << "Insufficient stats for cepstral mean and variance normalization: "
-              << "count = " << count;
-
-  if (!var_norm) {
-    Vector<BaseFloat> offset(dim);
-    SubVector<double> mean_stats(stats.RowData(0), dim);
-    offset.AddVec(-1.0 / count, mean_stats);
-    feats->AddVecToRows(1.0, offset);
-    return;
-  }
-  // norm(0, d) = mean offset;
-  // norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
-  Matrix<BaseFloat> norm(2, dim);
-  for (int32 d = 0; d < dim; d++) {
-    double mean, offset, scale;
-    mean = stats(0, d)/count;
-    double var = (stats(1, d)/count) - mean*mean,
-        floor = 1.0e-20;
-    if (var < floor) {
-      KALDI_WARN << "Flooring cepstral variance from " << var << " to "
-                 << floor;
-      var = floor;
-    }
-    scale = 1.0 / sqrt(var);
-    if (scale != scale || 1/scale == 0.0)
-      KALDI_ERR << "NaN or infinity in cepstral mean/variance computation";
-    offset = -(mean*scale);
-    norm(0, d) = offset;
-    norm(1, d) = scale;
-  }
-  // Apply the normalization.
-  feats->MulColsVec(norm.Row(1));
-  feats->AddVecToRows(1.0, norm.Row(0));
-}
-
-void ApplyCmvnReverse(const MatrixBase<double> &stats,
-                      bool var_norm,
-                      MatrixBase<BaseFloat> *feats) {
-  KALDI_ASSERT(feats != NULL);
-  int32 dim = stats.NumCols() - 1;
-  if (stats.NumRows() > 2 || stats.NumRows() < 1 || feats->NumCols() != dim) {
-    KALDI_ERR << "Dim mismatch: cmvn "
-              << stats.NumRows() << 'x' << stats.NumCols()
-              << ", feats " << feats->NumRows() << 'x' << feats->NumCols();
-  }
-  if (stats.NumRows() == 1 && var_norm)
-    KALDI_ERR << "You requested variance normalization but no variance stats "
-              << "are supplied.";
-
-  double count = stats(0, dim);
-  // Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
-  // computing an offset and representing it as stats, we use a count of one.
-  if (count < 1.0)
-    KALDI_ERR << "Insufficient stats for cepstral mean and variance normalization: "
-              << "count = " << count;
-
-  Matrix<BaseFloat> norm(2, dim);  // norm(0, d) = mean offset
-  // norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
-  for (int32 d = 0; d < dim; d++) {
-    double mean, offset, scale;
-    mean = stats(0, d) / count;
-    if (!var_norm) {
-      scale = 1.0;
-      offset = mean;
-    } else {
-      double var = (stats(1, d)/count) - mean*mean,
-          floor = 1.0e-20;
-      if (var < floor) {
-        KALDI_WARN << "Flooring cepstral variance from " << var << " to "
-                   << floor;
-        var = floor;
-      }
-      // we aim to transform zero-mean, unit-variance input into data
-      // with the given mean and variance.
-      scale = sqrt(var);
-      offset = mean;
-    }
-    norm(0, d) = offset;
-    norm(1, d) = scale;
-  }
-  if (var_norm)
-    feats->MulColsVec(norm.Row(1));
-  feats->AddVecToRows(1.0, norm.Row(0));
-}
-
-
-void FakeStatsForSomeDims(const std::vector<int32> &dims,
-                          MatrixBase<double> *stats) {
-  KALDI_ASSERT(stats->NumRows() == 2 && stats->NumCols() > 1);
-  int32 dim = stats->NumCols() - 1;
-  double count = (*stats)(0, dim);
-  for (size_t i = 0; i < dims.size(); i++) {
-    int32 d = dims[i];
-    KALDI_ASSERT(d >= 0 && d < dim);
-    (*stats)(0, d) = 0.0;
-    (*stats)(1, d) = count;
-  }
-}
-
-
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/cmvn.h
+++ b/speechx/speechx/kaldi/feat/cmvn.h
@ -1,75 +0,0 @@
-// transform/cmvn.h
-
-// Copyright 2009-2013 Microsoft Corporation
-//                     Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_TRANSFORM_CMVN_H_
-#define KALDI_TRANSFORM_CMVN_H_
-
-#include "base/kaldi-common.h"
-#include "matrix/matrix-lib.h"
-
-namespace kaldi {
-
-/// This function initializes the matrix to dimension 2 by (dim+1);
-/// 1st "dim" elements of 1st row are mean stats, 1st "dim" elements
-/// of 2nd row are var stats, last element of 1st row is count,
-/// last element of 2nd row is zero.
-void InitCmvnStats(int32 dim, Matrix<double> *stats);
-
-/// Accumulation from a single frame (weighted).
-void AccCmvnStats(const VectorBase<BaseFloat> &feat,
-                  BaseFloat weight,
-                  MatrixBase<double> *stats);
-
-/// Accumulation from a feature file (possibly weighted-- useful in excluding silence).
-void AccCmvnStats(const MatrixBase<BaseFloat> &feats,
-                  const VectorBase<BaseFloat> *weights,  // or NULL
-                  MatrixBase<double> *stats);
-
-/// Apply cepstral mean and variance normalization to a matrix of features.
-/// If norm_vars == true, expects stats to be of dimension 2 by (dim+1), but
-/// if norm_vars == false, will accept stats of dimension 1 by (dim+1); these
-/// are produced by the balanced-cmvn code when it computes an offset and
-/// represents it as "fake stats".
-void ApplyCmvn(const MatrixBase<double> &stats,
-               bool norm_vars,
-               MatrixBase<BaseFloat> *feats);
-
-/// This is as ApplyCmvn, but does so in the reverse sense, i.e. applies a transform
-/// that would take zero-mean, unit-variance input and turn it into output with the
-/// stats of "stats".  This can be useful if you trained without CMVN but later want
-/// to correct a mismatch, so you would first apply CMVN and then do the "reverse"
-/// CMVN with the summed stats of your training data.
-void ApplyCmvnReverse(const MatrixBase<double> &stats,
-                      bool norm_vars,
-                      MatrixBase<BaseFloat> *feats);
-
-
-/// Modify the stats so that for some dimensions (specified in "dims"), we
-/// replace them with "fake" stats that have zero mean and unit variance; this
-/// is done to disable CMVN for those dimensions.
-void FakeStatsForSomeDims(const std::vector<int32> &dims,
-                          MatrixBase<double> *stats);
-
-
-
-}  // namespace kaldi
-
-#endif  // KALDI_TRANSFORM_CMVN_H_
--- a/speechx/speechx/kaldi/feat/feature-common-inl.h
+++ b/speechx/speechx/kaldi/feat/feature-common-inl.h
@ -1,99 +0,0 @@
-// feat/feature-common-inl.h
-
-// Copyright       2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
-#define KALDI_FEAT_FEATURE_COMMON_INL_H_
-
-#include "feat/resample.h"
-// Do not include this file directly.  It is included by feat/feature-common.h
-
-namespace kaldi {
-
-template <class F>
-void OfflineFeatureTpl<F>::ComputeFeatures(
-    const VectorBase<BaseFloat> &wave,
-    BaseFloat sample_freq,
-    BaseFloat vtln_warp,
-    Matrix<BaseFloat> *output) {
-  KALDI_ASSERT(output != NULL);
-  BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
-  if (sample_freq == new_sample_freq) {
-    Compute(wave, vtln_warp, output);
-  } else {
-    if (new_sample_freq < sample_freq &&
-        ! computer_.GetFrameOptions().allow_downsample)
-        KALDI_ERR << "Waveform and config sample Frequency mismatch: "
-                  << sample_freq << " .vs " << new_sample_freq
-                  << " (use --allow-downsample=true to allow "
-                  << " downsampling the waveform).";
-    else if (new_sample_freq > sample_freq &&
-             ! computer_.GetFrameOptions().allow_upsample)
-      KALDI_ERR << "Waveform and config sample Frequency mismatch: "
-                  << sample_freq << " .vs " << new_sample_freq
-                << " (use --allow-upsample=true option to allow "
-                << " upsampling the waveform).";
-    // Resample the waveform.
-    Vector<BaseFloat> resampled_wave(wave);
-    ResampleWaveform(sample_freq, wave,
-                     new_sample_freq, &resampled_wave);
-    Compute(resampled_wave, vtln_warp, output);
-  }
-}
-
-template <class F>
-void OfflineFeatureTpl<F>::Compute(
-    const VectorBase<BaseFloat> &wave,
-    BaseFloat vtln_warp,
-    Matrix<BaseFloat> *output) {
-  KALDI_ASSERT(output != NULL);
-  int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
-      cols_out = computer_.Dim();
-  if (rows_out == 0) {
-    output->Resize(0, 0);
-    return;
-  }
-  output->Resize(rows_out, cols_out);
-  Vector<BaseFloat> window;  // windowed waveform.
-  bool use_raw_log_energy = computer_.NeedRawLogEnergy();
-  for (int32 r = 0; r < rows_out; r++) {  // r is frame index.
-    BaseFloat raw_log_energy = 0.0;
-    ExtractWindow(0, wave, r, computer_.GetFrameOptions(),
-                  feature_window_function_, &window,
-                  (use_raw_log_energy ? &raw_log_energy : NULL));
-
-    SubVector<BaseFloat> output_row(*output, r);
-    computer_.Compute(raw_log_energy, vtln_warp, &window, &output_row);
-  }
-}
-
-template <class F>
-void OfflineFeatureTpl<F>::Compute(
-    const VectorBase<BaseFloat> &wave,
-    BaseFloat vtln_warp,
-    Matrix<BaseFloat> *output) const {
-  OfflineFeatureTpl<F> temp(*this);
-  // call the non-const version of Compute() on a temporary copy of this object.
-  // This is a workaround for const-ness that may sometimes be useful in
-  // multi-threaded code, although it's not optimally efficient.
-  temp.Compute(wave, vtln_warp, output);
-}
-
-} // end namespace kaldi
-
-#endif
--- a/speechx/speechx/kaldi/feat/feature-common.h
+++ b/speechx/speechx/kaldi/feat/feature-common.h
@ -1,176 +0,0 @@
-// feat/feature-common.h
-
-// Copyright      2016   Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABILITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_FEATURE_COMMON_H_
-#define KALDI_FEAT_FEATURE_COMMON_H_
-
-#include <map>
-#include <string>
-#include "feat/feature-window.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-
-
-/// This class is only added for documentation, it is not intended to ever be
-/// used.
-struct ExampleFeatureComputerOptions {
-  FrameExtractionOptions frame_opts;
-  // .. more would go here.
-};
-
-/// This class is only added for documentation, it is not intended to ever be
-/// used.  It documents the interface of the *Computer classes which wrap the
-/// low-level feature extraction.  The template argument F of OfflineFeatureTpl must
-/// follow this interface.  This interface is intended for features such as
-/// MFCCs and PLPs which can be computed frame by frame.
-class ExampleFeatureComputer {
- public:
-  typedef ExampleFeatureComputerOptions Options;
-
-  /// Returns a reference to the frame-extraction options class, which
-  /// will be part of our own options class.
-  const FrameExtractionOptions &GetFrameOptions() const {
-    return opts_.frame_opts;
-  }
-
-  /// Returns the feature dimension
-  int32 Dim() const;
-
-  /// Returns true if this function may inspect the raw log-energy of the signal
-  /// (before windowing and pre-emphasis); it's safe to always return true, but
-  /// setting it to false enables an optimization.
-  bool NeedRawLogEnergy() const { return true; }
-
-  /// constructor from options class; it should not store a reference or pointer
-  /// to the options class but should copy it.
-  explicit ExampleFeatureComputer(const ExampleFeatureComputerOptions &opts):
-      opts_(opts) { }
-
-  /// Copy constructor; all of these classes must have one.
-  ExampleFeatureComputer(const ExampleFeatureComputer &other);
-
-  /**
-     Function that computes one frame of features from
-     one frame of signal.
-
-     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
-         prior to windowing and pre-emphasis, or
-         log(numeric_limits<float>::min()), whichever is greater.  Must be
-         ignored by this function if this class returns false from
-         this->NeedRawLogEnergy().
-     @param [in] vtln_warp  The VTLN warping factor that the user wants
-         to be applied when computing features for this utterance.  Will
-         normally be 1.0, meaning no warping is to be done.  The value will
-         be ignored for feature types that don't support VLTN, such as
-         spectrogram features.
-     @param [in] signal_frame  One frame of the signal,
-       as extracted using the function ExtractWindow() using the options
-       returned by this->GetFrameOptions().  The function will use the
-       vector as a workspace, which is why it's a non-const pointer.
-     @param [out] feature  Pointer to a vector of size this->Dim(), to which
-         the computed feature will be written.
-  */
-  void Compute(BaseFloat signal_raw_log_energy,
-               BaseFloat vtln_warp,
-               VectorBase<BaseFloat> *signal_frame,
-               VectorBase<BaseFloat> *feature);
-
- private:
-  // disallow assignment.
-  ExampleFeatureComputer &operator = (const ExampleFeatureComputer &in);
-  Options opts_;
-};
-
-
-/// This templated class is intended for offline feature extraction, i.e. where
-/// you have access to the entire signal at the start.  It exists mainly to be
-/// drop-in replacement for the old (pre-2016) classes Mfcc, Plp and so on, for
-/// use in the offline case.  In April 2016 we reorganized the online
-/// feature-computation code for greater modularity and to have correct support
-/// for the snip-edges=false option.
-template <class F>
-class OfflineFeatureTpl {
- public:
-  typedef typename F::Options Options;
-
-  // Note: feature_window_function_ is the windowing function, which initialized
-  // using the options class, that we cache at this level.
-  OfflineFeatureTpl(const Options &opts):
-      computer_(opts),
-      feature_window_function_(computer_.GetFrameOptions()) { }
-
-  // Internal (and back-compatibility) interface for computing features, which
-  // requires that the user has already checked that the sampling frequency
-  // of the waveform is equal to the sampling frequency specified in
-  // the frame-extraction options.
-  void Compute(const VectorBase<BaseFloat> &wave,
-               BaseFloat vtln_warp,
-               Matrix<BaseFloat> *output);
-
-  // This const version of Compute() is a wrapper that
-  // calls the non-const version on a temporary object.
-  // It's less efficient than the non-const version.
-  void Compute(const VectorBase<BaseFloat> &wave,
-               BaseFloat vtln_warp,
-               Matrix<BaseFloat> *output) const;
-
-  /**
-     Computes the features for one file (one sequence of features).
-     This is the newer interface where you specify the sample frequency
-     of the input waveform.
-       @param [in] wave   The input waveform
-       @param [in] sample_freq  The sampling frequency with which
-                                'wave' was sampled.
-                                if sample_freq is higher than the frequency
-                                specified in the config, we will downsample
-                                the waveform, but if lower, it's an error.
-     @param [in] vtln_warp  The VTLN warping factor (will normally
-                            be 1.0)
-     @param [out]  output  The matrix of features, where the row-index
-                           is the frame index.
-  */
-  void ComputeFeatures(const VectorBase<BaseFloat> &wave,
-                       BaseFloat sample_freq,
-                       BaseFloat vtln_warp,
-                       Matrix<BaseFloat> *output);
-
-  int32 Dim() const { return computer_.Dim(); }
-
-  // Copy constructor.
-  OfflineFeatureTpl(const OfflineFeatureTpl<F> &other):
-      computer_(other.computer_),
-      feature_window_function_(other.feature_window_function_) { }
-  private:
-  // Disallow assignment.
-  OfflineFeatureTpl<F> &operator =(const OfflineFeatureTpl<F> &other);
-
-  F computer_;
-  FeatureWindowFunction feature_window_function_;
-};
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-
-
-#include "feat/feature-common-inl.h"
-
-#endif  // KALDI_FEAT_FEATURE_COMMON_H_
--- a/speechx/speechx/kaldi/feat/feature-fbank.cc
+++ b/speechx/speechx/kaldi/feat/feature-fbank.cc
@ -1,125 +0,0 @@
-// feat/feature-fbank.cc
-
-// Copyright 2009-2012  Karel Vesely
-//                2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "feat/feature-fbank.h"
-
-namespace kaldi {
-
-FbankComputer::FbankComputer(const FbankOptions &opts):
-    opts_(opts), srfft_(NULL) {
-  if (opts.energy_floor > 0.0)
-    log_energy_floor_ = Log(opts.energy_floor);
-
-  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
-  if ((padded_window_size & (padded_window_size-1)) == 0)  // Is a power of two...
-    srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
-
-  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
-  // [note: this call caches it.]
-  GetMelBanks(1.0);
-}
-
-FbankComputer::FbankComputer(const FbankComputer &other):
-    opts_(other.opts_), log_energy_floor_(other.log_energy_floor_),
-    mel_banks_(other.mel_banks_), srfft_(NULL) {
-  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
-      iter != mel_banks_.end();
-      ++iter)
-    iter->second = new MelBanks(*(iter->second));
-  if (other.srfft_)
-    srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
-}
-
-FbankComputer::~FbankComputer() {
-  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
-      iter != mel_banks_.end(); ++iter)
-    delete iter->second;
-  delete srfft_;
-}
-
-const MelBanks* FbankComputer::GetMelBanks(BaseFloat vtln_warp) {
-  MelBanks *this_mel_banks = NULL;
-  std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
-  if (iter == mel_banks_.end()) {
-    this_mel_banks = new MelBanks(opts_.mel_opts,
-                                  opts_.frame_opts,
-                                  vtln_warp);
-    mel_banks_[vtln_warp] = this_mel_banks;
-  } else {
-    this_mel_banks = iter->second;
-  }
-  return this_mel_banks;
-}
-
-void FbankComputer::Compute(BaseFloat signal_raw_log_energy,
-                            BaseFloat vtln_warp,
-                            VectorBase<BaseFloat> *signal_frame,
-                            VectorBase<BaseFloat> *feature) {
-
-  const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
-
-  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
-               feature->Dim() == this->Dim());
-
-
-  // Compute energy after window function (not the raw one).
-  if (opts_.use_energy && !opts_.raw_energy)
-    signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
-                                     std::numeric_limits<float>::epsilon()));
-
-  if (srfft_ != NULL)  // Compute FFT using split-radix algorithm.
-    srfft_->Compute(signal_frame->Data(), true);
-  else  // An alternative algorithm that works for non-powers-of-two.
-    RealFft(signal_frame, true);
-
-  // Convert the FFT into a power spectrum.
-  ComputePowerSpectrum(signal_frame);
-  SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
-                                      signal_frame->Dim() / 2 + 1);
-
-  // Use magnitude instead of power if requested.
-  if (!opts_.use_power)
-    power_spectrum.ApplyPow(0.5);
-
-  int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
-  SubVector<BaseFloat> mel_energies(*feature,
-                                    mel_offset,
-                                    opts_.mel_opts.num_bins);
-
-  // Sum with mel fiterbanks over the power spectrum
-  mel_banks.Compute(power_spectrum, &mel_energies);
-  if (opts_.use_log_fbank) {
-    // Avoid log of zero (which should be prevented anyway by dithering).
-    mel_energies.ApplyFloor(std::numeric_limits<float>::epsilon());
-    mel_energies.ApplyLog();  // take the log.
-  }
-
-  // Copy energy as first value (or the last, if htk_compat == true).
-  if (opts_.use_energy) {
-    if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
-      signal_raw_log_energy = log_energy_floor_;
-    }
-    int32 energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
-    (*feature)(energy_index) = signal_raw_log_energy;
-  }
-}
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/feature-fbank.h
+++ b/speechx/speechx/kaldi/feat/feature-fbank.h
@ -1,149 +0,0 @@
-// feat/feature-fbank.h
-
-// Copyright 2009-2012  Karel Vesely
-//                2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_FEATURE_FBANK_H_
-#define KALDI_FEAT_FEATURE_FBANK_H_
-
-#include <map>
-#include <string>
-
-#include "feat/feature-common.h"
-#include "feat/feature-functions.h"
-#include "feat/feature-window.h"
-#include "feat/mel-computations.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-
-/// FbankOptions contains basic options for computing filterbank features.
-/// It only includes things that can be done in a "stateless" way, i.e.
-/// it does not include energy max-normalization.
-/// It does not include delta computation.
-struct FbankOptions {
-  FrameExtractionOptions frame_opts;
-  MelBanksOptions mel_opts;
-  bool use_energy;  // append an extra dimension with energy to the filter banks
-  BaseFloat energy_floor;
-  bool raw_energy;  // If true, compute energy before preemphasis and windowing
-  bool htk_compat;  // If true, put energy last (if using energy)
-  bool use_log_fbank;  // if true (default), produce log-filterbank, else linear
-  bool use_power;  // if true (default), use power in filterbank analysis, else magnitude.
-
-  FbankOptions(): mel_opts(23),
-                 // defaults the #mel-banks to 23 for the FBANK computations.
-                 // this seems to be common for 16khz-sampled data,
-                 // but for 8khz-sampled data, 15 may be better.
-                 use_energy(false),
-                 energy_floor(0.0),
-                 raw_energy(true),
-                 htk_compat(false),
-                 use_log_fbank(true),
-                 use_power(true) {}
-
-  void Register(OptionsItf *opts) {
-    frame_opts.Register(opts);
-    mel_opts.Register(opts);
-    opts->Register("use-energy", &use_energy,
-                   "Add an extra dimension with energy to the FBANK output.");
-    opts->Register("energy-floor", &energy_floor,
-                   "Floor on energy (absolute, not relative) in FBANK computation. "
-                   "Only makes a difference if --use-energy=true; only necessary if "
-                   "--dither=0.0.  Suggested values: 0.1 or 1.0");
-    opts->Register("raw-energy", &raw_energy,
-                   "If true, compute energy before preemphasis and windowing");
-    opts->Register("htk-compat", &htk_compat, "If true, put energy last.  "
-                   "Warning: not sufficient to get HTK compatible features (need "
-                   "to change other parameters).");
-    opts->Register("use-log-fbank", &use_log_fbank,
-                   "If true, produce log-filterbank, else produce linear.");
-    opts->Register("use-power", &use_power,
-                   "If true, use power, else use magnitude.");
-  }
-};
-
-
-/// Class for computing mel-filterbank features; see \ref feat_mfcc for more
-/// information.
-class FbankComputer {
- public:
-  typedef FbankOptions Options;
-
-  explicit FbankComputer(const FbankOptions &opts);
-  FbankComputer(const FbankComputer &other);
-
-  int32 Dim() const {
-    return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
-  }
-
-  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
-
-  const FrameExtractionOptions &GetFrameOptions() const {
-    return opts_.frame_opts;
-  }
-
-  /**
-     Function that computes one frame of features from
-     one frame of signal.
-
-     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
-         prior to windowing and pre-emphasis, or
-         log(numeric_limits<float>::min()), whichever is greater.  Must be
-         ignored by this function if this class returns false from
-         this->NeedsRawLogEnergy().
-     @param [in] vtln_warp  The VTLN warping factor that the user wants
-         to be applied when computing features for this utterance.  Will
-         normally be 1.0, meaning no warping is to be done.  The value will
-         be ignored for feature types that don't support VLTN, such as
-         spectrogram features.
-     @param [in] signal_frame  One frame of the signal,
-       as extracted using the function ExtractWindow() using the options
-       returned by this->GetFrameOptions().  The function will use the
-       vector as a workspace, which is why it's a non-const pointer.
-     @param [out] feature  Pointer to a vector of size this->Dim(), to which
-         the computed feature will be written.
-  */
-  void Compute(BaseFloat signal_raw_log_energy,
-               BaseFloat vtln_warp,
-               VectorBase<BaseFloat> *signal_frame,
-               VectorBase<BaseFloat> *feature);
-
-  ~FbankComputer();
-
-  const MelBanks *GetMelBanks(BaseFloat vtln_warp);
- private:
-
-
-  FbankOptions opts_;
-  BaseFloat log_energy_floor_;
-  std::map<BaseFloat, MelBanks*> mel_banks_;  // BaseFloat is VTLN coefficient.
-  SplitRadixRealFft<BaseFloat> *srfft_;
-  // Disallow assignment.
-  FbankComputer &operator =(const FbankComputer &other);
-};
-
-typedef OfflineFeatureTpl<FbankComputer> Fbank;
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-
-
-#endif  // KALDI_FEAT_FEATURE_FBANK_H_
--- a/speechx/speechx/kaldi/feat/feature-functions.cc
+++ b/speechx/speechx/kaldi/feat/feature-functions.cc
@ -1,362 +0,0 @@
-// feat/feature-functions.cc
-
-// Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Microsoft Corporation
-//                2013  Johns Hopkins University (author: Daniel Povey)
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "feat/feature-functions.h"
-#include "matrix/matrix-functions.h"
-
-
-namespace kaldi {
-
-void ComputePowerSpectrum(VectorBase<BaseFloat> *waveform) {
-  int32 dim = waveform->Dim();
-
-  // no, letting it be non-power-of-two for now.
-  // KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0));  // make sure a power of two.. actually my FFT code
-  // does not require this (dan) but this is better in case we use different code [dan].
-
-  // RealFft(waveform, true);  // true == forward (not inverse) FFT; makes no difference here,
-  // as we just want power spectrum.
-
-  // now we have in waveform, first half of complex spectrum
-  // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
-  int32 half_dim = dim/2;
-  BaseFloat first_energy = (*waveform)(0) * (*waveform)(0),
-      last_energy = (*waveform)(1) * (*waveform)(1);  // handle this special case
-  for (int32 i = 1; i < half_dim; i++) {
-    BaseFloat real = (*waveform)(i*2), im = (*waveform)(i*2 + 1);
-    (*waveform)(i) = real*real + im*im;
-  }
-  (*waveform)(0) = first_energy;
-  (*waveform)(half_dim) = last_energy;  // Will actually never be used, and anyway
-  // if the signal has been bandlimited sensibly this should be zero.
-}
-
-
-DeltaFeatures::DeltaFeatures(const DeltaFeaturesOptions &opts): opts_(opts) {
-  KALDI_ASSERT(opts.order >= 0 && opts.order < 1000);  // just make sure we don't get binary junk.
-  // opts will normally be 2 or 3.
-  KALDI_ASSERT(opts.window > 0 && opts.window < 1000);  // again, basic sanity check.
-  // normally the window size will be two.
-
-  scales_.resize(opts.order+1);
-  scales_[0].Resize(1);
-  scales_[0](0) = 1.0;  // trivial window for 0th order delta [i.e. baseline feats]
-
-  for (int32 i = 1; i <= opts.order; i++) {
-    Vector<BaseFloat> &prev_scales = scales_[i-1],
-        &cur_scales = scales_[i];
-    int32 window = opts.window;  // this code is designed to still
-    // work if instead we later make it an array and do opts.window[i-1],
-    // or something like that. "window" is a parameter specifying delta-window
-    // width which is actually 2*window + 1.
-    KALDI_ASSERT(window != 0);
-    int32 prev_offset = (static_cast<int32>(prev_scales.Dim()-1))/2,
-        cur_offset = prev_offset + window;
-    cur_scales.Resize(prev_scales.Dim() + 2*window);  // also zeros it.
-
-    BaseFloat normalizer = 0.0;
-    for (int32 j = -window; j <= window; j++) {
-      normalizer += j*j;
-      for (int32 k = -prev_offset; k <= prev_offset; k++) {
-        cur_scales(j+k+cur_offset) +=
-            static_cast<BaseFloat>(j) * prev_scales(k+prev_offset);
-      }
-    }
-    cur_scales.Scale(1.0 / normalizer);
-  }
-}
-
-void DeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
-                            int32 frame,
-                            VectorBase<BaseFloat> *output_frame) const {
-  KALDI_ASSERT(frame < input_feats.NumRows());
-  int32 num_frames = input_feats.NumRows(),
-      feat_dim = input_feats.NumCols();
-  KALDI_ASSERT(static_cast<int32>(output_frame->Dim()) == feat_dim * (opts_.order+1));
-  output_frame->SetZero();
-  for (int32 i = 0; i <= opts_.order; i++) {
-    const Vector<BaseFloat> &scales = scales_[i];
-    int32 max_offset = (scales.Dim() - 1) / 2;
-    SubVector<BaseFloat> output(*output_frame, i*feat_dim, feat_dim);
-    for (int32 j = -max_offset; j <= max_offset; j++) {
-      // if asked to read
-      int32 offset_frame = frame + j;
-      if (offset_frame < 0) offset_frame = 0;
-      else if (offset_frame >= num_frames)
-        offset_frame = num_frames - 1;
-      BaseFloat scale = scales(j + max_offset);
-      if (scale != 0.0)
-        output.AddVec(scale, input_feats.Row(offset_frame));
-    }
-  }
-}
-
-ShiftedDeltaFeatures::ShiftedDeltaFeatures(
-  const ShiftedDeltaFeaturesOptions &opts): opts_(opts) {
-  KALDI_ASSERT(opts.window > 0 && opts.window < 1000);
-
-  // Default window is 1.
-  int32 window = opts.window;
-  KALDI_ASSERT(window != 0);
-  scales_.Resize(1 + 2*window);  // also zeros it.
-  BaseFloat normalizer = 0.0;
-  for (int32 j = -window; j <= window; j++) {
-    normalizer += j*j;
-    scales_(j + window) += static_cast<BaseFloat>(j);
-  }
-  scales_.Scale(1.0 / normalizer);
-}
-
-void ShiftedDeltaFeatures::Process(const MatrixBase<BaseFloat> &input_feats,
-                            int32 frame,
-                            SubVector<BaseFloat> *output_frame) const {
-  KALDI_ASSERT(frame < input_feats.NumRows());
-  int32 num_frames = input_feats.NumRows(),
-      feat_dim = input_feats.NumCols();
-  KALDI_ASSERT(static_cast<int32>(output_frame->Dim())
-               == feat_dim * (opts_.num_blocks + 1));
-  output_frame->SetZero();
-
-  // The original features
-  SubVector<BaseFloat> output(*output_frame, 0, feat_dim);
-  output.AddVec(1.0, input_feats.Row(frame));
-
-  // Concatenate the delta-blocks. Each block is block_shift
-  // (usually 3) frames apart.
-  for (int32 i = 0; i < opts_.num_blocks; i++) {
-    int32 max_offset = (scales_.Dim() - 1) / 2;
-    SubVector<BaseFloat> output(*output_frame, (i + 1) * feat_dim, feat_dim);
-    for (int32 j = -max_offset; j <= max_offset; j++) {
-      int32 offset_frame = frame + j + i * opts_.block_shift;
-      if (offset_frame < 0) offset_frame = 0;
-      else if (offset_frame >= num_frames)
-        offset_frame = num_frames - 1;
-      BaseFloat scale = scales_(j + max_offset);
-      if (scale != 0.0)
-        output.AddVec(scale, input_feats.Row(offset_frame));
-    }
-  }
-}
-
-void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
-                   const MatrixBase<BaseFloat> &input_features,
-                   Matrix<BaseFloat> *output_features) {
-  output_features->Resize(input_features.NumRows(),
-                          input_features.NumCols()
-                          *(delta_opts.order + 1));
-  DeltaFeatures delta(delta_opts);
-  for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
-    SubVector<BaseFloat> row(*output_features, r);
-    delta.Process(input_features, r, &row);
-  }
-}
-
-void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
-                   const MatrixBase<BaseFloat> &input_features,
-                   Matrix<BaseFloat> *output_features) {
-  output_features->Resize(input_features.NumRows(),
-                          input_features.NumCols()
-                          * (delta_opts.num_blocks + 1));
-  ShiftedDeltaFeatures delta(delta_opts);
-
-  for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
-    SubVector<BaseFloat> row(*output_features, r);
-    delta.Process(input_features, r, &row);
-  }
-}
-
-
-void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out) {
-  BaseFloat angle = M_PI / static_cast<BaseFloat>(dimension - 1);
-  BaseFloat scale = 1.0f / (2.0 * static_cast<BaseFloat>(dimension - 1));
-  mat_out->Resize(n_bases, dimension);
-  for (int32 i = 0; i < n_bases; i++) {
-    (*mat_out)(i, 0) = 1.0 * scale;
-    BaseFloat i_fl = static_cast<BaseFloat>(i);
-    for (int32 j = 1; j < dimension - 1; j++) {
-      BaseFloat j_fl = static_cast<BaseFloat>(j);
-      (*mat_out)(i, j) = 2.0 * scale * cos(angle * i_fl * j_fl);
-    }
-
-    (*mat_out)(i, dimension -1)
-        = scale * cos(angle * i_fl * static_cast<BaseFloat>(dimension-1));
-  }
-}
-
-void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
-                  int32 left_context,
-                  int32 right_context,
-                  Matrix<BaseFloat> *output_features) {
-  int32 T = input_features.NumRows(), D = input_features.NumCols();
-  if (T == 0 || D == 0)
-    KALDI_ERR << "SpliceFrames: empty input";
-  KALDI_ASSERT(left_context >= 0 && right_context >= 0);
-  int32 N = 1 + left_context + right_context;
-  output_features->Resize(T, D*N);
-  for (int32 t = 0; t < T; t++) {
-    SubVector<BaseFloat> dst_row(*output_features, t);
-    for (int32 j = 0; j < N; j++) {
-      int32 t2 = t + j - left_context;
-      if (t2 < 0) t2 = 0;
-      if (t2 >= T) t2 = T-1;
-      SubVector<BaseFloat> dst(dst_row, j*D, D),
-          src(input_features, t2);
-      dst.CopyFromVec(src);
-    }
-  }
-}
-
-void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
-                   Matrix<BaseFloat> *output_features) {
-  int32 T = input_features.NumRows(), D = input_features.NumCols();
-  if (T == 0 || D == 0)
-    KALDI_ERR << "ReverseFrames: empty input";
-  output_features->Resize(T, D);
-  for (int32 t = 0; t < T; t++) {
-    SubVector<BaseFloat> dst_row(*output_features, t);
-    SubVector<BaseFloat> src_row(input_features, T-1-t);
-    dst_row.CopyFromVec(src_row);
-  }
-}
-
-
-void SlidingWindowCmnOptions::Check() const {
-  KALDI_ASSERT(cmn_window > 0);
-  if (center)
-    KALDI_ASSERT(min_window > 0 && min_window <= cmn_window);
-  // else ignored so value doesn't matter.
-}
-
-// Internal version of SlidingWindowCmn with double-precision arguments.
-void SlidingWindowCmnInternal(const SlidingWindowCmnOptions &opts,
-                              const MatrixBase<double> &input,
-                              MatrixBase<double> *output) {
-  opts.Check();
-  int32 num_frames = input.NumRows(), dim = input.NumCols(),
-        last_window_start = -1, last_window_end = -1,
-        warning_count = 0;
-  Vector<double> cur_sum(dim), cur_sumsq(dim);
-
-  for (int32 t = 0; t < num_frames; t++) {
-    int32 window_start, window_end; // note: window_end will be one
-    // past the end of the window we use for normalization.
-    if (opts.center) {
-      window_start = t - (opts.cmn_window / 2);
-      window_end = window_start + opts.cmn_window;
-    } else {
-      window_start = t - opts.cmn_window;
-      window_end = t + 1;
-    }
-    if (window_start < 0) { // shift window right if starts <0.
-      window_end -= window_start;
-      window_start = 0; // or: window_start -= window_start
-    }
-    if (!opts.center) {
-      if (window_end > t)
-        window_end = std::max(t + 1, opts.min_window);
-    }
-    if (window_end > num_frames) {
-      window_start -= (window_end - num_frames);
-      window_end = num_frames;
-      if (window_start < 0) window_start = 0;
-    }
-    if (last_window_start == -1) {
-      SubMatrix<double> input_part(input,
-                                      window_start, window_end - window_start,
-                                      0, dim);
-      cur_sum.AddRowSumMat(1.0, input_part , 0.0);
-      if (opts.normalize_variance)
-        cur_sumsq.AddDiagMat2(1.0, input_part, kTrans, 0.0);
-    } else {
-      if (window_start > last_window_start) {
-        KALDI_ASSERT(window_start == last_window_start + 1);
-        SubVector<double> frame_to_remove(input, last_window_start);
-        cur_sum.AddVec(-1.0, frame_to_remove);
-        if (opts.normalize_variance)
-          cur_sumsq.AddVec2(-1.0, frame_to_remove);
-      }
-      if (window_end > last_window_end) {
-        KALDI_ASSERT(window_end == last_window_end + 1);
-        SubVector<double> frame_to_add(input, last_window_end);
-        cur_sum.AddVec(1.0, frame_to_add);
-        if (opts.normalize_variance)
-          cur_sumsq.AddVec2(1.0, frame_to_add);
-      }
-    }
-    int32 window_frames = window_end - window_start;
-    last_window_start = window_start;
-    last_window_end = window_end;
-
-    KALDI_ASSERT(window_frames > 0);
-    SubVector<double> input_frame(input, t),
-        output_frame(*output, t);
-    output_frame.CopyFromVec(input_frame);
-    output_frame.AddVec(-1.0 / window_frames, cur_sum);
-
-    if (opts.normalize_variance) {
-      if (window_frames == 1) {
-        output_frame.Set(0.0);
-      } else {
-        Vector<double> variance(cur_sumsq);
-        variance.Scale(1.0 / window_frames);
-        variance.AddVec2(-1.0 / (window_frames * window_frames), cur_sum);
-        // now "variance" is the variance of the features in the window,
-        // around their own mean.
-        int32 num_floored;
-	variance.ApplyFloor(1.0e-10, &num_floored);
-        if (num_floored > 0 && num_frames > 1) {
-          if (opts.max_warnings == warning_count) {
-            KALDI_WARN << "Suppressing the remaining variance flooring "
-                       << "warnings. Run program with --max-warnings=-1 to "
-                       << "see all warnings.";
-          }
-          // If opts.max_warnings is a negative number, we won't restrict the
-          // number of times that the warning is printed out.
-          else if (opts.max_warnings < 0
-                   || opts.max_warnings > warning_count) {
-            KALDI_WARN << "Flooring when normalizing variance, floored "
-                       << num_floored << " elements; num-frames was "
-                       << window_frames;
-          }
-          warning_count++;
-        }
-        variance.ApplyPow(-0.5); // get inverse standard deviation.
-        output_frame.MulElements(variance);
-      }
-    }
-  }
-}
-
-
-void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
-                      const MatrixBase<BaseFloat> &input,
-                      MatrixBase<BaseFloat> *output) {
-  KALDI_ASSERT(SameDim(input, *output) && input.NumRows() > 0);
-  Matrix<double> input_dbl(input), output_dbl(input.NumRows(), input.NumCols());
-  // call double-precision version
-  SlidingWindowCmnInternal(opts, input_dbl, &output_dbl);
-  output->CopyFromMat(output_dbl);
-}
-
-
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/feature-functions.h
+++ b/speechx/speechx/kaldi/feat/feature-functions.h
@ -1,204 +0,0 @@
-// feat/feature-functions.h
-
-// Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Microsoft Corporation
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-//                2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_FEAT_FEATURE_FUNCTIONS_H_
-#define KALDI_FEAT_FEATURE_FUNCTIONS_H_
-
-#include <string>
-#include <vector>
-
-#include "matrix/matrix-lib.h"
-#include "util/common-utils.h"
-#include "base/kaldi-error.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-
-// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
-// functions in matrix/matrix-functions.h), and converts it into
-// a power spectrum.  If the complex FFT is a vector of size n (representing
-// half the complex FFT of a real signal of size n, as described there),
-// this function computes in the first (n/2) + 1 elements of it, the
-// energies of the fft bins from zero to the Nyquist frequency.  Contents of the
-// remaining (n/2) - 1 elements are undefined at output.
-void ComputePowerSpectrum(VectorBase<BaseFloat> *complex_fft);
-
-
-struct DeltaFeaturesOptions {
-  int32 order;
-  int32 window;  // e.g. 2; controls window size (window size is 2*window + 1)
-  // the behavior at the edges is to replicate the first or last frame.
-  // this is not configurable.
-
-  DeltaFeaturesOptions(int32 order = 2, int32 window = 2):
-      order(order), window(window) { }
-  void Register(OptionsItf *opts) {
-    opts->Register("delta-order", &order, "Order of delta computation");
-    opts->Register("delta-window", &window,
-                   "Parameter controlling window for delta computation (actual window"
-                   " size for each delta order is 1 + 2*delta-window-size)");
-  }
-};
-
-class DeltaFeatures {
- public:
-  // This class provides a low-level function to compute delta features.
-  // The function takes as input a matrix of features and a frame index
-  // that it should compute the deltas on.  It puts its output in an object
-  // of type VectorBase, of size (original-feature-dimension) * (opts.order+1).
-  // This is not the most efficient way to do the computation, but it's
-  // state-free and thus easier to understand
-
-  explicit DeltaFeatures(const DeltaFeaturesOptions &opts);
-
-  void Process(const MatrixBase<BaseFloat> &input_feats,
-               int32 frame,
-               VectorBase<BaseFloat> *output_frame) const;
- private:
-  DeltaFeaturesOptions opts_;
-  std::vector<Vector<BaseFloat> > scales_;  // a scaling window for each
-  // of the orders, including zero: multiply the features for each
-  // dimension by this window.
-};
-
-struct ShiftedDeltaFeaturesOptions {
-  int32 window,           // The time delay and advance
-        num_blocks,
-        block_shift;      // Distance between consecutive blocks
-
-  ShiftedDeltaFeaturesOptions():
-      window(1), num_blocks(7), block_shift(3) { }
-  void Register(OptionsItf *opts) {
-    opts->Register("delta-window", &window, "Size of delta advance and delay.");
-    opts->Register("num-blocks", &num_blocks, "Number of delta blocks in advance"
-                   " of each frame to be concatenated");
-    opts->Register("block-shift", &block_shift, "Distance between each block");
-  }
-};
-
-class ShiftedDeltaFeatures {
- public:
-  // This class provides a low-level function to compute shifted
-  // delta cesptra (SDC).
-  // The function takes as input a matrix of features and a frame index
-  // that it should compute the deltas on.  It puts its output in an object
-  // of type VectorBase, of size original-feature-dimension + (1  * num_blocks).
-
-  explicit ShiftedDeltaFeatures(const ShiftedDeltaFeaturesOptions &opts);
-
-  void Process(const MatrixBase<BaseFloat> &input_feats,
-               int32 frame,
-               SubVector<BaseFloat> *output_frame) const;
- private:
-  ShiftedDeltaFeaturesOptions opts_;
-  Vector<BaseFloat> scales_;  // a scaling window for each
-
-};
-
-// ComputeDeltas is a convenience function that computes deltas on a feature
-// file.  If you want to deal with features coming in bit by bit you would have
-// to use the DeltaFeatures class directly, and do the computation frame by
-// frame.  Later we will have to come up with a nice mechanism to do this for
-// features coming in.
-void ComputeDeltas(const DeltaFeaturesOptions &delta_opts,
-                   const MatrixBase<BaseFloat> &input_features,
-                   Matrix<BaseFloat> *output_features);
-
-// ComputeShiftedDeltas computes deltas from a feature file by applying
-// ShiftedDeltaFeatures over the frames. This function is provided for
-// convenience, however, ShiftedDeltaFeatures can be used directly.
-void ComputeShiftedDeltas(const ShiftedDeltaFeaturesOptions &delta_opts,
-                   const MatrixBase<BaseFloat> &input_features,
-                   Matrix<BaseFloat> *output_features);
-
-// SpliceFrames will normally be used together with LDA.
-// It splices frames together to make a window.  At the
-// start and end of an utterance, it duplicates the first
-// and last frames.
-// Will throw if input features are empty.
-// left_context and right_context must be nonnegative.
-// these both represent a number of frames (e.g. 4, 4 is
-// a good choice).
-void SpliceFrames(const MatrixBase<BaseFloat> &input_features,
-                  int32 left_context,
-                  int32 right_context,
-                  Matrix<BaseFloat> *output_features);
-
-// ReverseFrames reverses the frames in time (used for backwards decoding)
-void ReverseFrames(const MatrixBase<BaseFloat> &input_features,
-                  Matrix<BaseFloat> *output_features);
-
-
-void InitIdftBases(int32 n_bases, int32 dimension, Matrix<BaseFloat> *mat_out);
-
-
-// This is used for speaker-id.  Also see OnlineCmnOptions in ../online2/, which
-// is online CMN with no latency, for online speech recognition.
-struct SlidingWindowCmnOptions {
-  int32 cmn_window;
-  int32 min_window;
-  int32 max_warnings;
-  bool normalize_variance;
-  bool center;
-
-  SlidingWindowCmnOptions():
-      cmn_window(600),
-      min_window(100),
-      max_warnings(5),
-      normalize_variance(false),
-      center(false) { }
-
-  void Register(OptionsItf *opts) {
-    opts->Register("cmn-window", &cmn_window, "Window in frames for running "
-                   "average CMN computation");
-    opts->Register("min-cmn-window", &min_window, "Minimum CMN window "
-                   "used at start of decoding (adds latency only at start). "
-                   "Only applicable if center == false, ignored if center==true");
-    opts->Register("max-warnings", &max_warnings, "Maximum warnings to report "
-                   "per utterance. 0 to disable, -1 to show all.");
-    opts->Register("norm-vars", &normalize_variance, "If true, normalize "
-                   "variance to one."); // naming this as in apply-cmvn.cc
-    opts->Register("center", &center, "If true, use a window centered on the "
-                   "current frame (to the extent possible, modulo end effects). "
-                   "If false, window is to the left.");
-  }
-  void Check() const;
-};
-
-
-/// Applies sliding-window cepstral mean and/or variance normalization.  See the
-/// strings registering the options in the options class for information on how
-/// this works and what the options are.  input and output must have the same
-/// dimension.
-void SlidingWindowCmn(const SlidingWindowCmnOptions &opts,
-                      const MatrixBase<BaseFloat> &input,
-                      MatrixBase<BaseFloat> *output);
-
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-
-
-
-#endif  // KALDI_FEAT_FEATURE_FUNCTIONS_H_
--- a/speechx/speechx/kaldi/feat/feature-mfcc.cc
+++ b/speechx/speechx/kaldi/feat/feature-mfcc.cc
@ -1,157 +0,0 @@
-// feat/feature-mfcc.cc
-
-// Copyright 2009-2011  Karel Vesely;  Petr Motlicek
-//                2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "feat/feature-mfcc.h"
-
-
-namespace kaldi {
-
-
-void MfccComputer::Compute(BaseFloat signal_raw_log_energy,
-                           BaseFloat vtln_warp,
-                           VectorBase<BaseFloat> *signal_frame,
-                           VectorBase<BaseFloat> *feature) {
-  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
-               feature->Dim() == this->Dim());
-
-  const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
-
-  if (opts_.use_energy && !opts_.raw_energy)
-    signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
-                                     std::numeric_limits<float>::epsilon()));
-
-  if (srfft_ != NULL)  // Compute FFT using the split-radix algorithm.
-    srfft_->Compute(signal_frame->Data(), true);
-  else  // An alternative algorithm that works for non-powers-of-two.
-    RealFft(signal_frame, true);
-
-  // Convert the FFT into a power spectrum.
-  ComputePowerSpectrum(signal_frame);
-  SubVector<BaseFloat> power_spectrum(*signal_frame, 0,
-                                      signal_frame->Dim() / 2 + 1);
-
-  mel_banks.Compute(power_spectrum, &mel_energies_);
-
-  // avoid log of zero (which should be prevented anyway by dithering).
-  mel_energies_.ApplyFloor(std::numeric_limits<float>::epsilon());
-  mel_energies_.ApplyLog();  // take the log.
-
-  feature->SetZero();  // in case there were NaNs.
-  // feature = dct_matrix_ * mel_energies [which now have log]
-  feature->AddMatVec(1.0, dct_matrix_, kNoTrans, mel_energies_, 0.0);
-
-  if (opts_.cepstral_lifter != 0.0)
-    feature->MulElements(lifter_coeffs_);
-
-  if (opts_.use_energy) {
-    if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
-      signal_raw_log_energy = log_energy_floor_;
-    (*feature)(0) = signal_raw_log_energy;
-  }
-
-  if (opts_.htk_compat) {
-    BaseFloat energy = (*feature)(0);
-    for (int32 i = 0; i < opts_.num_ceps - 1; i++)
-      (*feature)(i) = (*feature)(i+1);
-    if (!opts_.use_energy)
-      energy *= M_SQRT2;  // scale on C0 (actually removing a scale
-    // we previously added that's part of one common definition of
-    // the cosine transform.)
-    (*feature)(opts_.num_ceps - 1)  = energy;
-  }
-}
-
-MfccComputer::MfccComputer(const MfccOptions &opts):
-    opts_(opts), srfft_(NULL),
-    mel_energies_(opts.mel_opts.num_bins) {
-
-  int32 num_bins = opts.mel_opts.num_bins;
-  if (opts.num_ceps > num_bins)
-    KALDI_ERR << "num-ceps cannot be larger than num-mel-bins."
-              << " It should be smaller or equal. You provided num-ceps: "
-              << opts.num_ceps << "  and num-mel-bins: "
-              << num_bins;
-
-  Matrix<BaseFloat> dct_matrix(num_bins, num_bins);
-  ComputeDctMatrix(&dct_matrix);
-  // Note that we include zeroth dct in either case.  If using the
-  // energy we replace this with the energy.  This means a different
-  // ordering of features than HTK.
-  SubMatrix<BaseFloat> dct_rows(dct_matrix, 0, opts.num_ceps, 0, num_bins);
-  dct_matrix_.Resize(opts.num_ceps, num_bins);
-  dct_matrix_.CopyFromMat(dct_rows);  // subset of rows.
-  if (opts.cepstral_lifter != 0.0) {
-    lifter_coeffs_.Resize(opts.num_ceps);
-    ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_);
-  }
-  if (opts.energy_floor > 0.0)
-    log_energy_floor_ = Log(opts.energy_floor);
-
-  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
-  if ((padded_window_size & (padded_window_size-1)) == 0)  // Is a power of two...
-    srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
-
-  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
-  // [note: this call caches it.]
-  GetMelBanks(1.0);
-}
-
-MfccComputer::MfccComputer(const MfccComputer &other):
-    opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_),
-    dct_matrix_(other.dct_matrix_),
-    log_energy_floor_(other.log_energy_floor_),
-    mel_banks_(other.mel_banks_),
-    srfft_(NULL),
-    mel_energies_(other.mel_energies_.Dim(), kUndefined) {
-  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
-       iter != mel_banks_.end(); ++iter)
-    iter->second = new MelBanks(*(iter->second));
-  if (other.srfft_ != NULL)
-    srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
-}
-
-
-
-MfccComputer::~MfccComputer() {
-  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
-      iter != mel_banks_.end();
-      ++iter)
-    delete iter->second;
-  delete srfft_;
-}
-
-const MelBanks *MfccComputer::GetMelBanks(BaseFloat vtln_warp) {
-  MelBanks *this_mel_banks = NULL;
-  std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
-  if (iter == mel_banks_.end()) {
-    this_mel_banks = new MelBanks(opts_.mel_opts,
-                                  opts_.frame_opts,
-                                  vtln_warp);
-    mel_banks_[vtln_warp] = this_mel_banks;
-  } else {
-    this_mel_banks = iter->second;
-  }
-  return this_mel_banks;
-}
-
-
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/feature-mfcc.h
+++ b/speechx/speechx/kaldi/feat/feature-mfcc.h
@ -1,154 +0,0 @@
-// feat/feature-mfcc.h
-
-// Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Saarland University
-//           2014-2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_FEATURE_MFCC_H_
-#define KALDI_FEAT_FEATURE_MFCC_H_
-
-#include <map>
-#include <string>
-
-#include "feat/feature-common.h"
-#include "feat/feature-functions.h"
-#include "feat/feature-window.h"
-#include "feat/mel-computations.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-
-/// MfccOptions contains basic options for computing MFCC features.
-struct MfccOptions {
-  FrameExtractionOptions frame_opts;
-  MelBanksOptions mel_opts;
-  int32 num_ceps;  // e.g. 13: num cepstral coeffs, counting zero.
-  bool use_energy;  // use energy; else C0
-  BaseFloat energy_floor;  // 0 by default; set to a value like 1.0 or 0.1 if
-                           // you disable dithering.
-  bool raw_energy;  // If true, compute energy before preemphasis and windowing
-  BaseFloat cepstral_lifter;  // Scaling factor on cepstra for HTK compatibility.
-                              // if 0.0, no liftering is done.
-  bool htk_compat;  // if true, put energy/C0 last and introduce a factor of
-                    // sqrt(2) on C0 to be the same as HTK.
-
-  MfccOptions() : mel_opts(23),
-                  // defaults the #mel-banks to 23 for the MFCC computations.
-                  // this seems to be common for 16khz-sampled data,
-                  // but for 8khz-sampled data, 15 may be better.
-                  num_ceps(13),
-                  use_energy(true),
-                  energy_floor(0.0),
-                  raw_energy(true),
-                  cepstral_lifter(22.0),
-                  htk_compat(false) {}
-
-  void Register(OptionsItf *opts) {
-    frame_opts.Register(opts);
-    mel_opts.Register(opts);
-    opts->Register("num-ceps", &num_ceps,
-                   "Number of cepstra in MFCC computation (including C0)");
-    opts->Register("use-energy", &use_energy,
-                   "Use energy (not C0) in MFCC computation");
-    opts->Register("energy-floor", &energy_floor,
-                   "Floor on energy (absolute, not relative) in MFCC computation. "
-                   "Only makes a difference if --use-energy=true; only necessary if "
-                   "--dither=0.0.  Suggested values: 0.1 or 1.0");
-    opts->Register("raw-energy", &raw_energy,
-                   "If true, compute energy before preemphasis and windowing");
-    opts->Register("cepstral-lifter", &cepstral_lifter,
-                   "Constant that controls scaling of MFCCs");
-    opts->Register("htk-compat", &htk_compat,
-                   "If true, put energy or C0 last and use a factor of sqrt(2) on "
-                   "C0.  Warning: not sufficient to get HTK compatible features "
-                   "(need to change other parameters).");
-  }
-};
-
-
-
-// This is the new-style interface to the MFCC computation.
-class MfccComputer {
- public:
-  typedef MfccOptions Options;
-  explicit MfccComputer(const MfccOptions &opts);
-  MfccComputer(const MfccComputer &other);
-
-  const FrameExtractionOptions &GetFrameOptions() const {
-    return opts_.frame_opts;
-  }
-
-  int32 Dim() const { return opts_.num_ceps; }
-
-  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
-
-  /**
-     Function that computes one frame of features from
-     one frame of signal.
-
-     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
-         prior to windowing and pre-emphasis, or
-         log(numeric_limits<float>::min()), whichever is greater.  Must be
-         ignored by this function if this class returns false from
-         this->NeedsRawLogEnergy().
-     @param [in] vtln_warp  The VTLN warping factor that the user wants
-         to be applied when computing features for this utterance.  Will
-         normally be 1.0, meaning no warping is to be done.  The value will
-         be ignored for feature types that don't support VLTN, such as
-         spectrogram features.
-     @param [in] signal_frame  One frame of the signal,
-       as extracted using the function ExtractWindow() using the options
-       returned by this->GetFrameOptions().  The function will use the
-       vector as a workspace, which is why it's a non-const pointer.
-     @param [out] feature  Pointer to a vector of size this->Dim(), to which
-         the computed feature will be written.
-  */
-  void Compute(BaseFloat signal_raw_log_energy,
-               BaseFloat vtln_warp,
-               VectorBase<BaseFloat> *signal_frame,
-               VectorBase<BaseFloat> *feature);
-
-  ~MfccComputer();
- private:
-  // disallow assignment.
-  MfccComputer &operator = (const MfccComputer &in);
-
- protected:
-  const MelBanks *GetMelBanks(BaseFloat vtln_warp);
-
-  MfccOptions opts_;
-  Vector<BaseFloat> lifter_coeffs_;
-  Matrix<BaseFloat> dct_matrix_;  // matrix we left-multiply by to perform DCT.
-  BaseFloat log_energy_floor_;
-  std::map<BaseFloat, MelBanks*> mel_banks_;  // BaseFloat is VTLN coefficient.
-  SplitRadixRealFft<BaseFloat> *srfft_;
-
-  // note: mel_energies_ is specific to the frame we're processing, it's
-  // just a temporary workspace.
-  Vector<BaseFloat> mel_energies_;
-};
-
-typedef OfflineFeatureTpl<MfccComputer> Mfcc;
-
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-
-
-#endif  // KALDI_FEAT_FEATURE_MFCC_H_
--- a/speechx/speechx/kaldi/feat/feature-plp.cc
+++ b/speechx/speechx/kaldi/feat/feature-plp.cc
@ -1,191 +0,0 @@
-// feat/feature-plp.cc
-
-// Copyright 2009-2011  Petr Motlicek;  Karel Vesely
-//                2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "feat/feature-plp.h"
-
-namespace kaldi {
-
-PlpComputer::PlpComputer(const PlpOptions &opts):
-    opts_(opts), srfft_(NULL),
-    mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
-    autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
-    lpc_coeffs_(opts_.lpc_order, kUndefined),
-    raw_cepstrum_(opts_.lpc_order, kUndefined) {
-
-  if (opts.cepstral_lifter != 0.0) {
-    lifter_coeffs_.Resize(opts.num_ceps);
-    ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_);
-  }
-  InitIdftBases(opts_.lpc_order + 1, opts_.mel_opts.num_bins + 2,
-                &idft_bases_);
-
-  if (opts.energy_floor > 0.0)
-    log_energy_floor_ = Log(opts.energy_floor);
-
-  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
-  if ((padded_window_size & (padded_window_size-1)) == 0)  // Is a power of two...
-    srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
-
-  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
-  // [note: this call caches it.]
-  GetMelBanks(1.0);
-}
-
-PlpComputer::PlpComputer(const PlpComputer &other):
-    opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_),
-    idft_bases_(other.idft_bases_), log_energy_floor_(other.log_energy_floor_),
-    mel_banks_(other.mel_banks_), equal_loudness_(other.equal_loudness_),
-    srfft_(NULL),
-    mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined),
-    autocorr_coeffs_(opts_.lpc_order + 1, kUndefined),
-    lpc_coeffs_(opts_.lpc_order, kUndefined),
-    raw_cepstrum_(opts_.lpc_order, kUndefined) {
-  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
-       iter != mel_banks_.end(); ++iter)
-    iter->second = new MelBanks(*(iter->second));
-  for (std::map<BaseFloat, Vector<BaseFloat>*>::iterator
-           iter = equal_loudness_.begin();
-       iter != equal_loudness_.end(); ++iter)
-    iter->second = new Vector<BaseFloat>(*(iter->second));
-  if (other.srfft_ != NULL)
-    srfft_ = new SplitRadixRealFft<BaseFloat>(*(other.srfft_));
-}
-
-PlpComputer::~PlpComputer() {
-  for (std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.begin();
-      iter != mel_banks_.end(); ++iter)
-    delete iter->second;
-  for (std::map<BaseFloat, Vector<BaseFloat>* >::iterator
-           iter = equal_loudness_.begin();
-       iter != equal_loudness_.end(); ++iter)
-    delete iter->second;
-  delete srfft_;
-}
-
-const MelBanks *PlpComputer::GetMelBanks(BaseFloat vtln_warp) {
-  MelBanks *this_mel_banks = NULL;
-  std::map<BaseFloat, MelBanks*>::iterator iter = mel_banks_.find(vtln_warp);
-  if (iter == mel_banks_.end()) {
-    this_mel_banks = new MelBanks(opts_.mel_opts,
-                                  opts_.frame_opts,
-                                  vtln_warp);
-    mel_banks_[vtln_warp] = this_mel_banks;
-  } else {
-    this_mel_banks = iter->second;
-  }
-  return this_mel_banks;
-}
-
-const Vector<BaseFloat> *PlpComputer::GetEqualLoudness(BaseFloat vtln_warp) {
-  const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
-  Vector<BaseFloat> *ans = NULL;
-  std::map<BaseFloat, Vector<BaseFloat>*>::iterator iter
-      = equal_loudness_.find(vtln_warp);
-  if (iter == equal_loudness_.end()) {
-    ans = new Vector<BaseFloat>;
-    GetEqualLoudnessVector(*this_mel_banks, ans);
-    equal_loudness_[vtln_warp] = ans;
-  } else {
-    ans = iter->second;
-  }
-  return ans;
-}
-
-void PlpComputer::Compute(BaseFloat signal_raw_log_energy,
-                          BaseFloat vtln_warp,
-                          VectorBase<BaseFloat> *signal_frame,
-                          VectorBase<BaseFloat> *feature) {
-  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
-               feature->Dim() == this->Dim());
-
-  const MelBanks &mel_banks = *GetMelBanks(vtln_warp);
-  const Vector<BaseFloat> &equal_loudness = *GetEqualLoudness(vtln_warp);
-
-
-  KALDI_ASSERT(opts_.num_ceps <= opts_.lpc_order+1);  // our num-ceps includes C0.
-
-
-  if (opts_.use_energy && !opts_.raw_energy)
-    signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
-                                     std::numeric_limits<float>::min()));
-
-  if (srfft_ != NULL)  // Compute FFT using split-radix algorithm.
-    srfft_->Compute(signal_frame->Data(), true);
-  else  // An alternative algorithm that works for non-powers-of-two.
-    RealFft(signal_frame, true);
-
-  // Convert the FFT into a power spectrum.
-  ComputePowerSpectrum(signal_frame);  // elements 0 ... signal_frame->Dim()/2
-
-  SubVector<BaseFloat> power_spectrum(*signal_frame,
-                                      0, signal_frame->Dim() / 2 + 1);
-
-  int32 num_mel_bins = opts_.mel_opts.num_bins;
-
-  SubVector<BaseFloat> mel_energies(mel_energies_duplicated_, 1, num_mel_bins);
-
-  mel_banks.Compute(power_spectrum, &mel_energies);
-
-  mel_energies.MulElements(equal_loudness);
-
-  mel_energies.ApplyPow(opts_.compress_factor);
-
-  // duplicate first and last elements
-  mel_energies_duplicated_(0) = mel_energies_duplicated_(1);
-  mel_energies_duplicated_(num_mel_bins + 1) =
-      mel_energies_duplicated_(num_mel_bins);
-
-  autocorr_coeffs_.SetZero();  // In case of NaNs or infs
-  autocorr_coeffs_.AddMatVec(1.0, idft_bases_, kNoTrans,
-                             mel_energies_duplicated_,  0.0);
-
-  BaseFloat residual_log_energy = ComputeLpc(autocorr_coeffs_, &lpc_coeffs_);
-
-  residual_log_energy = std::max<BaseFloat>(residual_log_energy,
-                                 std::numeric_limits<float>::min());
-
-  Lpc2Cepstrum(opts_.lpc_order, lpc_coeffs_.Data(), raw_cepstrum_.Data());
-  feature->Range(1, opts_.num_ceps - 1).CopyFromVec(
-      raw_cepstrum_.Range(0, opts_.num_ceps - 1));
-  (*feature)(0) = residual_log_energy;
-
-  if (opts_.cepstral_lifter != 0.0)
-    feature->MulElements(lifter_coeffs_);
-
-  if (opts_.cepstral_scale != 1.0)
-    feature->Scale(opts_.cepstral_scale);
-
-  if (opts_.use_energy) {
-    if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
-      signal_raw_log_energy = log_energy_floor_;
-    (*feature)(0) = signal_raw_log_energy;
-  }
-
-  if (opts_.htk_compat) {  // reorder the features.
-    BaseFloat log_energy = (*feature)(0);
-    for (int32 i = 0; i < opts_.num_ceps-1; i++)
-      (*feature)(i) = (*feature)(i+1);
-    (*feature)(opts_.num_ceps-1)  = log_energy;
-  }
-}
-
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/feature-plp.h
+++ b/speechx/speechx/kaldi/feat/feature-plp.h
@ -1,176 +0,0 @@
-// feat/feature-plp.h
-
-// Copyright 2009-2011  Petr Motlicek;  Karel Vesely
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_FEATURE_PLP_H_
-#define KALDI_FEAT_FEATURE_PLP_H_
-
-#include <map>
-#include <string>
-
-#include "feat/feature-common.h"
-#include "feat/feature-functions.h"
-#include "feat/feature-window.h"
-#include "feat/mel-computations.h"
-#include "util/options-itf.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-
-
-/// PlpOptions contains basic options for computing PLP features.
-/// It only includes things that can be done in a "stateless" way, i.e.
-/// it does not include energy max-normalization.
-/// It does not include delta computation.
-struct PlpOptions {
-  FrameExtractionOptions frame_opts;
-  MelBanksOptions mel_opts;
-  int32 lpc_order;
-  int32 num_ceps;  // num cepstra including zero
-  bool use_energy;  // use energy; else C0
-  BaseFloat energy_floor;
-  bool raw_energy;  // If true, compute energy before preemphasis and windowing
-  BaseFloat compress_factor;
-  int32 cepstral_lifter;
-  BaseFloat cepstral_scale;
-
-  bool htk_compat;  // if true, put energy/C0 last and introduce a factor of
-                    // sqrt(2) on C0 to be the same as HTK.
-
-  PlpOptions() : mel_opts(23),
-                 // default number of mel-banks for the PLP computation; this
-                 // seems to be common for 16kHz-sampled data. For 8kHz-sampled
-                 // data, 15 may be better.
-                 lpc_order(12),
-                 num_ceps(13),
-                 use_energy(true),
-                 energy_floor(0.0),
-                 raw_energy(true),
-                 compress_factor(0.33333),
-                 cepstral_lifter(22),
-                 cepstral_scale(1.0),
-                 htk_compat(false) {}
-
-  void Register(OptionsItf *opts) {
-    frame_opts.Register(opts);
-    mel_opts.Register(opts);
-    opts->Register("lpc-order", &lpc_order,
-                   "Order of LPC analysis in PLP computation");
-    opts->Register("num-ceps", &num_ceps,
-                   "Number of cepstra in PLP computation (including C0)");
-    opts->Register("use-energy", &use_energy,
-                   "Use energy (not C0) for zeroth PLP feature");
-    opts->Register("energy-floor", &energy_floor,
-                   "Floor on energy (absolute, not relative) in PLP computation. "
-                   "Only makes a difference if --use-energy=true; only necessary if "
-                   "--dither=0.0.  Suggested values: 0.1 or 1.0");
-    opts->Register("raw-energy", &raw_energy,
-                   "If true, compute energy before preemphasis and windowing");
-    opts->Register("compress-factor", &compress_factor,
-                   "Compression factor in PLP computation");
-    opts->Register("cepstral-lifter", &cepstral_lifter,
-                   "Constant that controls scaling of PLPs");
-    opts->Register("cepstral-scale", &cepstral_scale,
-                   "Scaling constant in PLP computation");
-    opts->Register("htk-compat", &htk_compat,
-                   "If true, put energy or C0 last.  Warning: not sufficient "
-                   "to get HTK compatible features (need to change other "
-                   "parameters).");
-  }
-};
-
-
-/// This is the new-style interface to the PLP computation.
-class PlpComputer {
- public:
-  typedef PlpOptions Options;
-  explicit PlpComputer(const PlpOptions &opts);
-  PlpComputer(const PlpComputer &other);
-
-  const FrameExtractionOptions &GetFrameOptions() const {
-    return opts_.frame_opts;
-  }
-
-  int32 Dim() const { return opts_.num_ceps; }
-
-  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
-
-  /**
-     Function that computes one frame of features from
-     one frame of signal.
-
-     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
-         prior to windowing and pre-emphasis, or
-         log(numeric_limits<float>::min()), whichever is greater.  Must be
-         ignored by this function if this class returns false from
-         this->NeedsRawLogEnergy().
-     @param [in] vtln_warp  The VTLN warping factor that the user wants
-         to be applied when computing features for this utterance.  Will
-         normally be 1.0, meaning no warping is to be done.  The value will
-         be ignored for feature types that don't support VLTN, such as
-         spectrogram features.
-     @param [in] signal_frame  One frame of the signal,
-       as extracted using the function ExtractWindow() using the options
-       returned by this->GetFrameOptions().  The function will use the
-       vector as a workspace, which is why it's a non-const pointer.
-     @param [out] feature  Pointer to a vector of size this->Dim(), to which
-         the computed feature will be written.
-  */
-  void Compute(BaseFloat signal_raw_log_energy,
-               BaseFloat vtln_warp,
-               VectorBase<BaseFloat> *signal_frame,
-               VectorBase<BaseFloat> *feature);
-
-  ~PlpComputer();
- private:
-
-  const MelBanks *GetMelBanks(BaseFloat vtln_warp);
-
-  const Vector<BaseFloat> *GetEqualLoudness(BaseFloat vtln_warp);
-
-  PlpOptions opts_;
-  Vector<BaseFloat> lifter_coeffs_;
-  Matrix<BaseFloat> idft_bases_;
-  BaseFloat log_energy_floor_;
-  std::map<BaseFloat, MelBanks*> mel_banks_;  // BaseFloat is VTLN coefficient.
-  std::map<BaseFloat, Vector<BaseFloat>* > equal_loudness_;
-  SplitRadixRealFft<BaseFloat> *srfft_;
-
-  // temporary vector used inside Compute; size is opts_.mel_opts.num_bins + 2
-  Vector<BaseFloat> mel_energies_duplicated_;
-  // temporary vector used inside Compute; size is opts_.lpc_order + 1
-  Vector<BaseFloat> autocorr_coeffs_;
-  // temporary vector used inside Compute; size is opts_.lpc_order
-  Vector<BaseFloat> lpc_coeffs_;
-  // temporary vector used inside Compute; size is opts_.lpc_order
-  Vector<BaseFloat> raw_cepstrum_;
-
-  // Disallow assignment.
-  PlpComputer &operator =(const PlpComputer &other);
-};
-
-typedef OfflineFeatureTpl<PlpComputer> Plp;
-
-/// @} End of "addtogroup feat"
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_FEAT_FEATURE_PLP_H_
--- a/speechx/speechx/kaldi/feat/feature-spectrogram.cc
+++ b/speechx/speechx/kaldi/feat/feature-spectrogram.cc
@ -1,82 +0,0 @@
-// feat/feature-spectrogram.cc
-
-// Copyright 2009-2012  Karel Vesely
-// Copyright 2012  Navdeep Jaitly
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "feat/feature-spectrogram.h"
-
-
-namespace kaldi {
-
-SpectrogramComputer::SpectrogramComputer(const SpectrogramOptions &opts)
-    : opts_(opts), srfft_(NULL) {
-  if (opts.energy_floor > 0.0)
-    log_energy_floor_ = Log(opts.energy_floor);
-
-  int32 padded_window_size = opts.frame_opts.PaddedWindowSize();
-  if ((padded_window_size & (padded_window_size-1)) == 0)  // Is a power of two
-    srfft_ = new SplitRadixRealFft<BaseFloat>(padded_window_size);
-}
-
-SpectrogramComputer::SpectrogramComputer(const SpectrogramComputer &other):
-    opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), srfft_(NULL) {
-  if (other.srfft_ != NULL)
-    srfft_ = new SplitRadixRealFft<BaseFloat>(*other.srfft_);
-}
-
-SpectrogramComputer::~SpectrogramComputer() {
-  delete srfft_;
-}
-
-void SpectrogramComputer::Compute(BaseFloat signal_raw_log_energy,
-                                  BaseFloat vtln_warp,
-                                  VectorBase<BaseFloat> *signal_frame,
-                                  VectorBase<BaseFloat> *feature) {
-  KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() &&
-               feature->Dim() == this->Dim());
-
-
-  // Compute energy after window function (not the raw one)
-  if (!opts_.raw_energy)
-    signal_raw_log_energy = Log(std::max<BaseFloat>(VecVec(*signal_frame, *signal_frame),
-                                     std::numeric_limits<float>::epsilon()));
-
-  if (srfft_ != NULL)  // Compute FFT using split-radix algorithm.
-    srfft_->Compute(signal_frame->Data(), true);
-  else  // An alternative algorithm that works for non-powers-of-two
-    RealFft(signal_frame, true);
-
-  // Convert the FFT into a power spectrum.
-  ComputePowerSpectrum(signal_frame);
-  SubVector<BaseFloat> power_spectrum(*signal_frame,
-                                      0, signal_frame->Dim() / 2 + 1);
-
-  power_spectrum.ApplyFloor(std::numeric_limits<float>::epsilon());
-  power_spectrum.ApplyLog();
-
-  feature->CopyFromVec(power_spectrum);
-
-  if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_)
-    signal_raw_log_energy = log_energy_floor_;
-  // The zeroth spectrogram component is always set to the signal energy,
-  // instead of the square of the constant component of the signal.
-  (*feature)(0) = signal_raw_log_energy;
-}
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/feature-spectrogram.h
+++ b/speechx/speechx/kaldi/feat/feature-spectrogram.h
@ -1,117 +0,0 @@
-// feat/feature-spectrogram.h
-
-// Copyright 2009-2012  Karel Vesely
-// Copyright 2012  Navdeep Jaitly
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_FEATURE_SPECTROGRAM_H_
-#define KALDI_FEAT_FEATURE_SPECTROGRAM_H_
-
-
-#include <string>
-
-#include "feat/feature-common.h"
-#include "feat/feature-functions.h"
-#include "feat/feature-window.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-
-/// SpectrogramOptions contains basic options for computing spectrogram
-/// features.
-struct SpectrogramOptions {
-  FrameExtractionOptions frame_opts;
-  BaseFloat energy_floor;
-  bool raw_energy;  // If true, compute energy before preemphasis and windowing
-
-  SpectrogramOptions() :
-    energy_floor(0.0),
-    raw_energy(true) {}
-
-  void Register(OptionsItf *opts) {
-    frame_opts.Register(opts);
-    opts->Register("energy-floor", &energy_floor,
-                   "Floor on energy (absolute, not relative) in Spectrogram "
-                   "computation.  Caution: this floor is applied to the zeroth "
-                   "component, representing the total signal energy.  The "
-                   "floor on the individual spectrogram elements is fixed at "
-                   "std::numeric_limits<float>::epsilon().");
-    opts->Register("raw-energy", &raw_energy,
-                   "If true, compute energy before preemphasis and windowing");
-  }
-};
-
-/// Class for computing spectrogram features.
-class SpectrogramComputer {
- public:
-  typedef SpectrogramOptions Options;
-  explicit SpectrogramComputer(const SpectrogramOptions &opts);
-  SpectrogramComputer(const SpectrogramComputer &other);
-
-  const FrameExtractionOptions& GetFrameOptions() const {
-    return opts_.frame_opts;
-  }
-
-  int32 Dim() const { return opts_.frame_opts.PaddedWindowSize() / 2 + 1; }
-
-  bool NeedRawLogEnergy() const { return opts_.raw_energy; }
-
-
-  /**
-     Function that computes one frame of spectrogram features from
-     one frame of signal.
-
-     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
-         prior to windowing and pre-emphasis, or
-         log(numeric_limits<float>::min()), whichever is greater.  Must be
-         ignored by this function if this class returns false from
-         this->NeedsRawLogEnergy().
-     @param [in] vtln_warp  This is ignored by this function, it's only
-         needed for interface compatibility.
-     @param [in] signal_frame  One frame of the signal,
-       as extracted using the function ExtractWindow() using the options
-       returned by this->GetFrameOptions().  The function will use the
-       vector as a workspace, which is why it's a non-const pointer.
-     @param [out] feature  Pointer to a vector of size this->Dim(), to which
-         the computed feature will be written.
-  */
-  void Compute(BaseFloat signal_raw_log_energy,
-               BaseFloat vtln_warp,
-               VectorBase<BaseFloat> *signal_frame,
-               VectorBase<BaseFloat> *feature);
-
-  ~SpectrogramComputer();
-
- private:
-  SpectrogramOptions opts_;
-  BaseFloat log_energy_floor_;
-  SplitRadixRealFft<BaseFloat> *srfft_;
-
-  // Disallow assignment.
-  SpectrogramComputer &operator=(const SpectrogramComputer &other);
-};
-
-typedef OfflineFeatureTpl<SpectrogramComputer> Spectrogram;
-
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-
-
-#endif  // KALDI_FEAT_FEATURE_SPECTROGRAM_H_
--- a/speechx/speechx/kaldi/feat/feature-window.cc
+++ b/speechx/speechx/kaldi/feat/feature-window.cc
@ -1,222 +0,0 @@
-// feat/feature-window.cc
-
-// Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Microsoft Corporation
-//           2013-2016  Johns Hopkins University (author: Daniel Povey)
-//                2014  IMSL, PKU-HKUST (author: Wei Shi)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "feat/feature-window.h"
-#include "matrix/matrix-functions.h"
-
-
-namespace kaldi {
-
-
-int64 FirstSampleOfFrame(int32 frame,
-                         const FrameExtractionOptions &opts) {
-  int64 frame_shift = opts.WindowShift();
-  if (opts.snip_edges) {
-    return frame * frame_shift;
-  } else {
-    int64 midpoint_of_frame = frame_shift * frame  +  frame_shift / 2,
-        beginning_of_frame = midpoint_of_frame  -  opts.WindowSize() / 2;
-    return beginning_of_frame;
-  }
-}
-
-int32 NumFrames(int64 num_samples,
-                const FrameExtractionOptions &opts,
-                bool flush) {
-  int64 frame_shift = opts.WindowShift();
-  int64 frame_length = opts.WindowSize();
-  if (opts.snip_edges) {
-    // with --snip-edges=true (the default), we use a HTK-like approach to
-    // determining the number of frames-- all frames have to fit completely into
-    // the waveform, and the first frame begins at sample zero.
-    if (num_samples < frame_length)
-      return 0;
-    else
-      return (1 + ((num_samples - frame_length) / frame_shift));
-    // You can understand the expression above as follows: 'num_samples -
-    // frame_length' is how much room we have to shift the frame within the
-    // waveform; 'frame_shift' is how much we shift it each time; and the ratio
-    // is how many times we can shift it (integer arithmetic rounds down).
-  } else {
-    // if --snip-edges=false, the number of frames is determined by rounding the
-    // (file-length / frame-shift) to the nearest integer.  The point of this
-    // formula is to make the number of frames an obvious and predictable
-    // function of the frame shift and signal length, which makes many
-    // segmentation-related questions simpler.
-    //
-    // Because integer division in C++ rounds toward zero, we add (half the
-    // frame-shift minus epsilon) before dividing, to have the effect of
-    // rounding towards the closest integer.
-    int32 num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
-
-    if (flush)
-      return num_frames;
-
-    // note: 'end' always means the last plus one, i.e. one past the last.
-    int64 end_sample_of_last_frame = FirstSampleOfFrame(num_frames - 1, opts)
-        + frame_length;
-
-    // the following code is optimized more for clarity than efficiency.
-    // If flush == false, we can't output frames that extend past the end
-    // of the signal.
-    while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
-      num_frames--;
-      end_sample_of_last_frame -= frame_shift;
-    }
-    return num_frames;
-  }
-}
-
-
-void Dither(VectorBase<BaseFloat> *waveform, BaseFloat dither_value) {
-  if (dither_value == 0.0)
-    return;
-  int32 dim = waveform->Dim();
-  BaseFloat *data = waveform->Data();
-  RandomState rstate;
-  for (int32 i = 0; i < dim; i++)
-    data[i] += RandGauss(&rstate) * dither_value;
-}
-
-
-void Preemphasize(VectorBase<BaseFloat> *waveform, BaseFloat preemph_coeff) {
-  if (preemph_coeff == 0.0) return;
-  KALDI_ASSERT(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
-  for (int32 i = waveform->Dim()-1; i > 0; i--)
-    (*waveform)(i) -= preemph_coeff * (*waveform)(i-1);
-  (*waveform)(0) -= preemph_coeff * (*waveform)(0);
-}
-
-FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts) {
-  int32 frame_length = opts.WindowSize();
-  KALDI_ASSERT(frame_length > 0);
-  window.Resize(frame_length);
-  double a = M_2PI / (frame_length-1);
-  for (int32 i = 0; i < frame_length; i++) {
-    double i_fl = static_cast<double>(i);
-    if (opts.window_type == "hanning") {
-      window(i) = 0.5  - 0.5*cos(a * i_fl);
-    } else if (opts.window_type == "hamming") {
-      window(i) = 0.54 - 0.46*cos(a * i_fl);
-    } else if (opts.window_type == "povey") {  // like hamming but goes to zero at edges.
-      window(i) = pow(0.5 - 0.5*cos(a * i_fl), 0.85);
-    } else if (opts.window_type == "rectangular") {
-      window(i) = 1.0;
-    } else if (opts.window_type == "blackman") {
-      window(i) = opts.blackman_coeff - 0.5*cos(a * i_fl) +
-        (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
-    } else {
-      KALDI_ERR << "Invalid window type " << opts.window_type;
-    }
-  }
-}
-
-void ProcessWindow(const FrameExtractionOptions &opts,
-                   const FeatureWindowFunction &window_function,
-                   VectorBase<BaseFloat> *window,
-                   BaseFloat *log_energy_pre_window) {
-  int32 frame_length = opts.WindowSize();
-  KALDI_ASSERT(window->Dim() == frame_length);
-
-  if (opts.dither != 0.0)
-    Dither(window, opts.dither);
-
-  if (opts.remove_dc_offset)
-    window->Add(-window->Sum() / frame_length);
-
-  if (log_energy_pre_window != NULL) {
-    BaseFloat energy = std::max<BaseFloat>(VecVec(*window, *window),
-                                std::numeric_limits<float>::epsilon());
-    *log_energy_pre_window = Log(energy);
-  }
-
-  if (opts.preemph_coeff != 0.0)
-    Preemphasize(window, opts.preemph_coeff);
-
-  window->MulElements(window_function.window);
-}
-
-
-// ExtractWindow extracts a windowed frame of waveform with a power-of-two,
-// padded size.  It does mean subtraction, pre-emphasis and dithering as
-// requested.
-void ExtractWindow(int64 sample_offset,
-                   const VectorBase<BaseFloat> &wave,
-                   int32 f,  // with 0 <= f < NumFrames(feats, opts)
-                   const FrameExtractionOptions &opts,
-                   const FeatureWindowFunction &window_function,
-                   Vector<BaseFloat> *window,
-                   BaseFloat *log_energy_pre_window) {
-  KALDI_ASSERT(sample_offset >= 0 && wave.Dim() != 0);
-  int32 frame_length = opts.WindowSize(),
-      frame_length_padded = opts.PaddedWindowSize();
-  int64 num_samples = sample_offset + wave.Dim(),
-      start_sample = FirstSampleOfFrame(f, opts),
-      end_sample = start_sample + frame_length;
-
-  if (opts.snip_edges) {
-    KALDI_ASSERT(start_sample >= sample_offset &&
-                 end_sample <= num_samples);
-  } else {
-    KALDI_ASSERT(sample_offset == 0 || start_sample >= sample_offset);
-  }
-
-  if (window->Dim() != frame_length_padded)
-    window->Resize(frame_length_padded, kUndefined);
-
-  // wave_start and wave_end are start and end indexes into 'wave', for the
-  // piece of wave that we're trying to extract.
-  int32 wave_start = int32(start_sample - sample_offset),
-      wave_end = wave_start + frame_length;
-  if (wave_start >= 0 && wave_end <= wave.Dim()) {
-    // the normal case-- no edge effects to consider.
-    window->Range(0, frame_length).CopyFromVec(
-        wave.Range(wave_start, frame_length));
-  } else {
-    // Deal with any end effects by reflection, if needed.  This code will only
-    // be reached for about two frames per utterance, so we don't concern
-    // ourselves excessively with efficiency.
-    int32 wave_dim = wave.Dim();
-    for (int32 s = 0; s < frame_length; s++) {
-      int32 s_in_wave = s + wave_start;
-      while (s_in_wave < 0 || s_in_wave >= wave_dim) {
-        // reflect around the beginning or end of the wave.
-        // e.g. -1 -> 0, -2 -> 1.
-        // dim -> dim - 1, dim + 1 -> dim - 2.
-        // the code supports repeated reflections, although this
-        // would only be needed in pathological cases.
-        if (s_in_wave < 0) s_in_wave = - s_in_wave - 1;
-        else s_in_wave = 2 * wave_dim - 1 - s_in_wave;
-      }
-      (*window)(s) = wave(s_in_wave);
-    }
-  }
-
-  if (frame_length_padded > frame_length)
-    window->Range(frame_length, frame_length_padded - frame_length).SetZero();
-
-  SubVector<BaseFloat> frame(*window, 0, frame_length);
-
-  ProcessWindow(opts, window_function, &frame, log_energy_pre_window);
-}
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/feature-window.h
+++ b/speechx/speechx/kaldi/feat/feature-window.h
@ -1,223 +0,0 @@
-// feat/feature-window.h
-
-// Copyright 2009-2011  Karel Vesely;  Petr Motlicek;  Saarland University
-//           2014-2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_FEATURE_WINDOW_H_
-#define KALDI_FEAT_FEATURE_WINDOW_H_
-
-#include <map>
-#include <string>
-
-#include "matrix/matrix-lib.h"
-#include "util/common-utils.h"
-#include "base/kaldi-error.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-struct FrameExtractionOptions {
-  BaseFloat samp_freq;
-  BaseFloat frame_shift_ms;  // in milliseconds.
-  BaseFloat frame_length_ms;  // in milliseconds.
-  BaseFloat dither;  // Amount of dithering, 0.0 means no dither.
-  BaseFloat preemph_coeff;  // Preemphasis coefficient.
-  bool remove_dc_offset;  // Subtract mean of wave before FFT.
-  std::string window_type;  // e.g. Hamming window
-  // May be "hamming", "rectangular", "povey", "hanning", "blackman"
-  // "povey" is a window I made to be similar to Hamming but to go to zero at the
-  // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
-  // I just don't think the Hamming window makes sense as a windowing function.
-  bool round_to_power_of_two;
-  BaseFloat blackman_coeff;
-  bool snip_edges;
-  bool allow_downsample;
-  bool allow_upsample;
-  int max_feature_vectors;
-  FrameExtractionOptions():
-      samp_freq(16000),
-      frame_shift_ms(10.0),
-      frame_length_ms(25.0),
-      dither(1.0),
-      preemph_coeff(0.97),
-      remove_dc_offset(true),
-      window_type("povey"),
-      round_to_power_of_two(true),
-      blackman_coeff(0.42),
-      snip_edges(true),
-      allow_downsample(false),
-      allow_upsample(false),
-      max_feature_vectors(-1)
-      { }
-
-  void Register(OptionsItf *opts) {
-    opts->Register("sample-frequency", &samp_freq,
-                   "Waveform data sample frequency (must match the waveform file, "
-                   "if specified there)");
-    opts->Register("frame-length", &frame_length_ms, "Frame length in milliseconds");
-    opts->Register("frame-shift", &frame_shift_ms, "Frame shift in milliseconds");
-    opts->Register("preemphasis-coefficient", &preemph_coeff,
-                   "Coefficient for use in signal preemphasis");
-    opts->Register("remove-dc-offset", &remove_dc_offset,
-                   "Subtract mean from waveform on each frame");
-    opts->Register("dither", &dither, "Dithering constant (0.0 means no dither). "
-                   "If you turn this off, you should set the --energy-floor "
-                   "option, e.g. to 1.0 or 0.1");
-    opts->Register("window-type", &window_type, "Type of window "
-                   "(\"hamming\"|\"hanning\"|\"povey\"|\"rectangular\""
-                   "|\"blackmann\")");
-    opts->Register("blackman-coeff", &blackman_coeff,
-                   "Constant coefficient for generalized Blackman window.");
-    opts->Register("round-to-power-of-two", &round_to_power_of_two,
-                   "If true, round window size to power of two by zero-padding "
-                   "input to FFT.");
-    opts->Register("snip-edges", &snip_edges,
-                   "If true, end effects will be handled by outputting only frames that "
-                   "completely fit in the file, and the number of frames depends on the "
-                   "frame-length.  If false, the number of frames depends only on the "
-                   "frame-shift, and we reflect the data at the ends.");
-    opts->Register("allow-downsample", &allow_downsample,
-                   "If true, allow the input waveform to have a higher frequency than "
-                   "the specified --sample-frequency (and we'll downsample).");
-    opts->Register("max-feature-vectors", &max_feature_vectors,
-                   "Memory optimization. If larger than 0, periodically remove feature "
-                   "vectors so that only this number of the latest feature vectors is "
-                   "retained.");
-    opts->Register("allow-upsample", &allow_upsample,
-                   "If true, allow the input waveform to have a lower frequency than "
-                   "the specified --sample-frequency (and we'll upsample).");
-  }
-  int32 WindowShift() const {
-    return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
-  }
-  int32 WindowSize() const {
-    return static_cast<int32>(samp_freq * 0.001 * frame_length_ms);
-  }
-  int32 PaddedWindowSize() const {
-    return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) :
-                                    WindowSize());
-  }
-};
-
-
-struct FeatureWindowFunction {
-  FeatureWindowFunction() {}
-  explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
-  FeatureWindowFunction(const FeatureWindowFunction &other):
-      window(other.window) { }
-  Vector<BaseFloat> window;
-};
-
-
-/**
-   This function returns the number of frames that we can extract from a wave
-   file with the given number of samples in it (assumed to have the same
-   sampling rate as specified in 'opts').
-
-      @param [in] num_samples  The number of samples in the wave file.
-      @param [in] opts     The frame-extraction options class
-
-      @param [in] flush   True if we are asserting that this number of samples is
-             'all there is', false if we expecting more data to possibly come
-             in.  This only makes a difference to the answer if opts.snips_edges
-             == false.  For offline feature extraction you always want flush ==
-             true.  In an online-decoding context, once you know (or decide) that
-             no more data is coming in, you'd call it with flush == true at the
-             end to flush out any remaining data.
-*/
-int32 NumFrames(int64 num_samples,
-                const FrameExtractionOptions &opts,
-                bool flush = true);
-
-/*
-   This function returns the index of the first sample of the frame indexed
-   'frame'.  If snip-edges=true, it just returns frame * opts.WindowShift(); if
-   snip-edges=false, the formula is a little more complicated and the result may
-   be negative.
-*/
-int64 FirstSampleOfFrame(int32 frame,
-                         const FrameExtractionOptions &opts);
-
-
-
-void Dither(VectorBase<BaseFloat> *waveform, BaseFloat dither_value);
-
-void Preemphasize(VectorBase<BaseFloat> *waveform, BaseFloat preemph_coeff);
-
-/**
-  This function does all the windowing steps after actually
-  extracting the windowed signal: depending on the
-  configuration, it does dithering, dc offset removal,
-  preemphasis, and multiplication by the windowing function.
-   @param [in] opts  The options class to be used
-   @param [in] window_function  The windowing function-- should have
-                    been initialized using 'opts'.
-   @param [in,out] window  A vector of size opts.WindowSize().  Note:
-      it will typically be a sub-vector of a larger vector of size
-      opts.PaddedWindowSize(), with the remaining samples zero,
-      as the FFT code is more efficient if it operates on data with
-      power-of-two size.
-   @param [out]   log_energy_pre_window If non-NULL, then after dithering and
-      DC offset removal, this function will write to this pointer the log of
-      the total energy (i.e. sum-squared) of the frame.
- */
-void ProcessWindow(const FrameExtractionOptions &opts,
-                   const FeatureWindowFunction &window_function,
-                   VectorBase<BaseFloat> *window,
-                   BaseFloat *log_energy_pre_window = NULL);
-
-
-/*
-  ExtractWindow() extracts a windowed frame of waveform (possibly with a
-  power-of-two, padded size, depending on the config), including all the
-  proessing done by ProcessWindow().
-
-  @param [in] sample_offset  If 'wave' is not the entire waveform, but
-                   part of it to the left has been discarded, then the
-                   number of samples prior to 'wave' that we have
-                   already discarded.  Set this to zero if you are
-                   processing the entire waveform in one piece, or
-                   if you get 'no matching function' compilation
-                   errors when updating the code.
-  @param [in] wave  The waveform
-  @param [in] f     The frame index to be extracted, with
-                    0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
-  @param [in] opts  The options class to be used
-  @param [in] window_function  The windowing function, as derived from the
-                    options class.
-  @param [out] window  The windowed, possibly-padded waveform to be
-                     extracted.  Will be resized as needed.
-  @param [out] log_energy_pre_window  If non-NULL, the log-energy of
-                   the signal prior to pre-emphasis and multiplying by
-                   the windowing function will be written to here.
-*/
-void ExtractWindow(int64 sample_offset,
-                   const VectorBase<BaseFloat> &wave,
-                   int32 f,
-                   const FrameExtractionOptions &opts,
-                   const FeatureWindowFunction &window_function,
-                   Vector<BaseFloat> *window,
-                   BaseFloat *log_energy_pre_window = NULL);
-
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-
-
-#endif  // KALDI_FEAT_FEATURE_WINDOW_H_
--- a/speechx/speechx/kaldi/feat/mel-computations.cc
+++ b/speechx/speechx/kaldi/feat/mel-computations.cc
@ -1,340 +0,0 @@
-// feat/mel-computations.cc
-
-// Copyright 2009-2011  Phonexia s.r.o.;  Karel Vesely;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <float.h>
-#include <algorithm>
-#include <iostream>
-
-#include "feat/feature-functions.h"
-#include "feat/feature-window.h"
-#include "feat/mel-computations.h"
-
-namespace kaldi {
-
-
-MelBanks::MelBanks(const MelBanksOptions &opts,
-                   const FrameExtractionOptions &frame_opts,
-                   BaseFloat vtln_warp_factor):
-    htk_mode_(opts.htk_mode) {
-  int32 num_bins = opts.num_bins;
-  if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins";
-  BaseFloat sample_freq = frame_opts.samp_freq;
-  int32 window_length_padded = frame_opts.PaddedWindowSize();
-  KALDI_ASSERT(window_length_padded % 2 == 0);
-  int32 num_fft_bins = window_length_padded / 2;
-  BaseFloat nyquist = 0.5 * sample_freq;
-
-  BaseFloat low_freq = opts.low_freq, high_freq;
-  if (opts.high_freq > 0.0)
-    high_freq = opts.high_freq;
-  else
-    high_freq = nyquist + opts.high_freq;
-
-  if (low_freq < 0.0 || low_freq >= nyquist
-      || high_freq <= 0.0 || high_freq > nyquist
-      || high_freq <= low_freq)
-    KALDI_ERR << "Bad values in options: low-freq " << low_freq
-              << " and high-freq " << high_freq << " vs. nyquist "
-              << nyquist;
-
-  BaseFloat fft_bin_width = sample_freq / window_length_padded;
-  // fft-bin width [think of it as Nyquist-freq / half-window-length]
-
-  BaseFloat mel_low_freq = MelScale(low_freq);
-  BaseFloat mel_high_freq = MelScale(high_freq);
-
-  debug_ = opts.debug_mel;
-
-  // divide by num_bins+1 in next line because of end-effects where the bins
-  // spread out to the sides.
-  BaseFloat mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins+1);
-
-  BaseFloat vtln_low = opts.vtln_low,
-      vtln_high = opts.vtln_high;
-  if (vtln_high < 0.0) {
-    vtln_high += nyquist;
-  }
-
-  if (vtln_warp_factor != 1.0 &&
-      (vtln_low < 0.0 || vtln_low <= low_freq
-       || vtln_low >= high_freq
-       || vtln_high <= 0.0 || vtln_high >= high_freq
-       || vtln_high <= vtln_low))
-    KALDI_ERR << "Bad values in options: vtln-low " << vtln_low
-              << " and vtln-high " << vtln_high << ", versus "
-              << "low-freq " << low_freq << " and high-freq "
-              << high_freq;
-
-  bins_.resize(num_bins);
-  center_freqs_.Resize(num_bins);
-
-  for (int32 bin = 0; bin < num_bins; bin++) {
-    BaseFloat left_mel = mel_low_freq + bin * mel_freq_delta,
-        center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
-        right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-
-    if (vtln_warp_factor != 1.0) {
-      left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
-                                 vtln_warp_factor, left_mel);
-      center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
-                                 vtln_warp_factor, center_mel);
-      right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
-                                  vtln_warp_factor, right_mel);
-    }
-    center_freqs_(bin) = InverseMelScale(center_mel);
-    // this_bin will be a vector of coefficients that is only
-    // nonzero where this mel bin is active.
-    Vector<BaseFloat> this_bin(num_fft_bins);
-    int32 first_index = -1, last_index = -1;
-    for (int32 i = 0; i < num_fft_bins; i++) {
-      BaseFloat freq = (fft_bin_width * i);  // Center frequency of this fft
-                                             // bin.
-      BaseFloat mel = MelScale(freq);
-      if (mel > left_mel && mel < right_mel) {
-        BaseFloat weight;
-        if (mel <= center_mel)
-          weight = (mel - left_mel) / (center_mel - left_mel);
-        else
-         weight = (right_mel-mel) / (right_mel-center_mel);
-        this_bin(i) = weight;
-        if (first_index == -1)
-          first_index = i;
-        last_index = i;
-      }
-    }
-    //KALDI_ASSERT(first_index != -1 && last_index >= first_index
-    //             && "You may have set --num-mel-bins too large.");
-
-    bins_[bin].first = first_index;
-    int32 size = last_index + 1 - first_index;
-    bins_[bin].second.Resize(size);
-    bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size));
-
-    // Replicate a bug in HTK, for testing purposes.
-    if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0)
-      bins_[bin].second(0) = 0.0;
-
-  }
-  if (debug_) {
-    for (size_t i = 0; i < bins_.size(); i++) {
-      KALDI_LOG << "bin " << i << ", offset = " << bins_[i].first
-                << ", vec = " << bins_[i].second;
-    }
-  }
-}
-
-MelBanks::MelBanks(const MelBanks &other):
-    center_freqs_(other.center_freqs_),
-    bins_(other.bins_),
-    debug_(other.debug_),
-    htk_mode_(other.htk_mode_) { }
-
-BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
-                                 BaseFloat vtln_high_cutoff,
-                                 BaseFloat low_freq,  // upper+lower frequency cutoffs in mel computation
-                                 BaseFloat high_freq,
-                                 BaseFloat vtln_warp_factor,
-                                 BaseFloat freq) {
-  /// This computes a VTLN warping function that is not the same as HTK's one,
-  /// but has similar inputs (this function has the advantage of never producing
-  /// empty bins).
-
-  /// This function computes a warp function F(freq), defined between low_freq and
-  /// high_freq inclusive, with the following properties:
-  ///  F(low_freq) == low_freq
-  ///  F(high_freq) == high_freq
-  /// The function is continuous and piecewise linear with two inflection
-  ///   points.
-  /// The lower inflection point (measured in terms of the unwarped
-  ///  frequency) is at frequency l, determined as described below.
-  /// The higher inflection point is at a frequency h, determined as
-  ///   described below.
-  /// If l <= f <= h, then F(f) = f/vtln_warp_factor.
-  /// If the higher inflection point (measured in terms of the unwarped
-  ///   frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
-  ///   Since (by the last point) F(h) == h/vtln_warp_factor, then
-  ///   max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
-  ///   h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
-  ///     = vtln_high_cutoff * min(1, vtln_warp_factor).
-  /// If the lower inflection point (measured in terms of the unwarped
-  ///   frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
-  ///   This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
-  ///                       = vtln_low_cutoff * max(1, vtln_warp_factor)
-
-
-  if (freq < low_freq || freq > high_freq) return freq;  // in case this gets called
-  // for out-of-range frequencies, just return the freq.
-
-  KALDI_ASSERT(vtln_low_cutoff > low_freq &&
-               "be sure to set the --vtln-low option higher than --low-freq");
-  KALDI_ASSERT(vtln_high_cutoff < high_freq &&
-               "be sure to set the --vtln-high option lower than --high-freq [or negative]");
-  BaseFloat one = 1.0;
-  BaseFloat l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
-  BaseFloat h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
-  BaseFloat scale = 1.0 / vtln_warp_factor;
-  BaseFloat Fl = scale * l;  // F(l);
-  BaseFloat Fh = scale * h;  // F(h);
-  KALDI_ASSERT(l > low_freq && h < high_freq);
-  // slope of left part of the 3-piece linear function
-  BaseFloat scale_left = (Fl - low_freq) / (l - low_freq);
-  // [slope of center part is just "scale"]
-
-  // slope of right part of the 3-piece linear function
-  BaseFloat scale_right = (high_freq - Fh) / (high_freq - h);
-
-  if (freq < l) {
-    return low_freq + scale_left * (freq - low_freq);
-  } else if (freq < h) {
-    return scale * freq;
-  } else {  // freq >= h
-    return high_freq + scale_right * (freq - high_freq);
-  }
-}
-
-BaseFloat MelBanks::VtlnWarpMelFreq(BaseFloat vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
-                                    BaseFloat vtln_high_cutoff,
-                                    BaseFloat low_freq,  // upper+lower frequency cutoffs in mel computation
-                                    BaseFloat high_freq,
-                                    BaseFloat vtln_warp_factor,
-                                    BaseFloat mel_freq) {
-  return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff,
-                               low_freq, high_freq,
-                               vtln_warp_factor, InverseMelScale(mel_freq)));
-}
-
-
-// "power_spectrum" contains fft energies.
-void MelBanks::Compute(const VectorBase<BaseFloat> &power_spectrum,
-                       VectorBase<BaseFloat> *mel_energies_out) const {
-  int32 num_bins = bins_.size();
-  KALDI_ASSERT(mel_energies_out->Dim() == num_bins);
-
-  for (int32 i = 0; i < num_bins; i++) {
-    int32 offset = bins_[i].first;
-    const Vector<BaseFloat> &v(bins_[i].second);
-    BaseFloat energy = VecVec(v, power_spectrum.Range(offset, v.Dim()));
-    // HTK-like flooring- for testing purposes (we prefer dither)
-    if (htk_mode_ && energy < 1.0) energy = 1.0;
-    (*mel_energies_out)(i) = energy;
-
-    // The following assert was added due to a problem with OpenBlas that
-    // we had at one point (it was a bug in that library).  Just to detect
-    // it early.
-    KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
-  }
-
-  if (debug_) {
-    fprintf(stderr, "MEL BANKS:\n");
-    for (int32 i = 0; i < num_bins; i++)
-      fprintf(stderr, " %f", (*mel_energies_out)(i));
-    fprintf(stderr, "\n");
-  }
-}
-
-void ComputeLifterCoeffs(BaseFloat Q, VectorBase<BaseFloat> *coeffs) {
-  // Compute liftering coefficients (scaling on cepstral coeffs)
-  // coeffs are numbered slightly differently from HTK: the zeroth
-  // index is C0, which is not affected.
-  for (int32 i = 0; i < coeffs->Dim(); i++)
-    (*coeffs)(i) = 1.0 + 0.5 * Q * sin (M_PI * i / Q);
-}
-
-
-// Durbin's recursion - converts autocorrelation coefficients to the LPC
-// pTmp - temporal place [n]
-// pAC - autocorrelation coefficients [n + 1]
-// pLP - linear prediction coefficients [n] (predicted_sn = sum_1^P{a[i-1] * s[n-i]}})
-//       F(z) = 1 / (1 - A(z)), 1 is not stored in the demoninator
-BaseFloat Durbin(int n, const BaseFloat *pAC, BaseFloat *pLP, BaseFloat *pTmp) {
-  BaseFloat ki;                // reflection coefficient
-  int i;
-  int j;
-
-  BaseFloat E = pAC[0];
-
-  for (i = 0; i < n; i++) {
-    // next reflection coefficient
-    ki = pAC[i + 1];
-    for (j = 0; j < i; j++)
-      ki += pLP[j] * pAC[i - j];
-    ki = ki / E;
-
-    // new error
-    BaseFloat c = 1 - ki * ki;
-    if (c < 1.0e-5) // remove NaNs for constan signal
-      c = 1.0e-5;
-    E *= c;
-
-    // new LP coefficients
-    pTmp[i] = -ki;
-    for (j = 0; j < i; j++)
-      pTmp[j] = pLP[j] - ki * pLP[i - j - 1];
-
-    for (j = 0; j <= i; j++)
-      pLP[j] = pTmp[j];
-  }
-
-  return E;
-}
-
-
-void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst) {
-  for (int32 i = 0; i < n; i++) {
-    double sum = 0.0;
-    int j;
-    for (j = 0; j < i; j++) {
-      sum += static_cast<BaseFloat>(i - j) * pLPC[j] * pCepst[i - j - 1];
-    }
-    pCepst[i] = -pLPC[i] - sum / static_cast<BaseFloat>(i + 1);
-  }
-}
-
-void GetEqualLoudnessVector(const MelBanks &mel_banks,
-                            Vector<BaseFloat> *ans) {
-  int32 n = mel_banks.NumBins();
-  // Central frequency of each mel bin.
-  const Vector<BaseFloat> &f0 = mel_banks.GetCenterFreqs();
-  ans->Resize(n);
-  for (int32 i = 0; i < n; i++) {
-    BaseFloat fsq = f0(i) * f0(i);
-    BaseFloat fsub = fsq / (fsq + 1.6e5);
-    (*ans)(i) = fsub * fsub * ((fsq + 1.44e6) / (fsq + 9.61e6));
-  }
-}
-
-
-// Compute LP coefficients from autocorrelation coefficients.
-BaseFloat ComputeLpc(const VectorBase<BaseFloat> &autocorr_in,
-                     Vector<BaseFloat> *lpc_out) {
-  int32 n = autocorr_in.Dim() - 1;
-  KALDI_ASSERT(lpc_out->Dim() == n);
-  Vector<BaseFloat> tmp(n);
-  BaseFloat ans = Durbin(n, autocorr_in.Data(),
-                         lpc_out->Data(),
-                         tmp.Data());
-  if (ans <= 0.0)
-    KALDI_WARN << "Zero energy in LPC computation";
-  return -Log(1.0 / ans);  // forms the C0 value
-}
-
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/mel-computations.h
+++ b/speechx/speechx/kaldi/feat/mel-computations.h
@ -1,171 +0,0 @@
-// feat/mel-computations.h
-
-// Copyright 2009-2011  Phonexia s.r.o.;  Microsoft Corporation
-//                2016  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_MEL_COMPUTATIONS_H_
-#define KALDI_FEAT_MEL_COMPUTATIONS_H_
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <complex>
-#include <utility>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-#include "matrix/matrix-lib.h"
-
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-struct FrameExtractionOptions;  // defined in feature-window.h
-
-
-struct MelBanksOptions {
-  int32 num_bins;  // e.g. 25; number of triangular bins
-  BaseFloat low_freq;  // e.g. 20; lower frequency cutoff
-  BaseFloat high_freq;  // an upper frequency cutoff; 0 -> no cutoff, negative
-  // ->added to the Nyquist frequency to get the cutoff.
-  BaseFloat vtln_low;  // vtln lower cutoff of warping function.
-  BaseFloat vtln_high;  // vtln upper cutoff of warping function: if negative, added
-                        // to the Nyquist frequency to get the cutoff.
-  bool debug_mel;
-  // htk_mode is a "hidden" config, it does not show up on command line.
-  // Enables more exact compatibility with HTK, for testing purposes.  Affects
-  // mel-energy flooring and reproduces a bug in HTK.
-  bool htk_mode;
-  explicit MelBanksOptions(int num_bins = 25)
-      : num_bins(num_bins), low_freq(20), high_freq(0), vtln_low(100),
-        vtln_high(-500), debug_mel(false), htk_mode(false) {}
-
-  void Register(OptionsItf *opts) {
-    opts->Register("num-mel-bins", &num_bins,
-                   "Number of triangular mel-frequency bins");
-    opts->Register("low-freq", &low_freq,
-                   "Low cutoff frequency for mel bins");
-    opts->Register("high-freq", &high_freq,
-                   "High cutoff frequency for mel bins (if <= 0, offset from Nyquist)");
-    opts->Register("vtln-low", &vtln_low,
-                   "Low inflection point in piecewise linear VTLN warping function");
-    opts->Register("vtln-high", &vtln_high,
-                   "High inflection point in piecewise linear VTLN warping function"
-                   " (if negative, offset from high-mel-freq");
-    opts->Register("debug-mel", &debug_mel,
-                   "Print out debugging information for mel bin computation");
-  }
-};
-
-
-class MelBanks {
- public:
-
-  static inline BaseFloat InverseMelScale(BaseFloat mel_freq) {
-    return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
-  }
-
-  static inline BaseFloat MelScale(BaseFloat freq) {
-    return 1127.0f * logf (1.0f + freq / 700.0f);
-  }
-
-  static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff,
-                                BaseFloat vtln_high_cutoff,  // discontinuities in warp func
-                                BaseFloat low_freq,
-                                BaseFloat high_freq,  // upper+lower frequency cutoffs in
-                                // the mel computation
-                                BaseFloat vtln_warp_factor,
-                                BaseFloat freq);
-
-  static BaseFloat VtlnWarpMelFreq(BaseFloat vtln_low_cutoff,
-                                   BaseFloat vtln_high_cutoff,
-                                   BaseFloat low_freq,
-                                   BaseFloat high_freq,
-                                   BaseFloat vtln_warp_factor,
-                                   BaseFloat mel_freq);
-
-
-  MelBanks(const MelBanksOptions &opts,
-           const FrameExtractionOptions &frame_opts,
-           BaseFloat vtln_warp_factor);
-
-  /// Compute Mel energies (note: not log enerties).
-  /// At input, "fft_energies" contains the FFT energies (not log).
-  void Compute(const VectorBase<BaseFloat> &fft_energies,
-               VectorBase<BaseFloat> *mel_energies_out) const;
-
-  int32 NumBins() const { return bins_.size(); }
-
-  // returns vector of central freq of each bin; needed by plp code.
-  const Vector<BaseFloat> &GetCenterFreqs() const { return center_freqs_; }
-
-  const std::vector<std::pair<int32, Vector<BaseFloat> > >& GetBins() const {
-    return bins_;
-  }
-
-  // Copy constructor
-  MelBanks(const MelBanks &other);
- private:
-  // Disallow assignment
-  MelBanks &operator = (const MelBanks &other);
-
-  // center frequencies of bins, numbered from 0 ... num_bins-1.
-  // Needed by GetCenterFreqs().
-  Vector<BaseFloat> center_freqs_;
-
-  // the "bins_" vector is a vector, one for each bin, of a pair:
-  // (the first nonzero fft-bin), (the vector of weights).
-  std::vector<std::pair<int32, Vector<BaseFloat> > > bins_;
-
-  bool debug_;
-  bool htk_mode_;
-};
-
-
-// Compute liftering coefficients (scaling on cepstral coeffs)
-// coeffs are numbered slightly differently from HTK: the zeroth
-// index is C0, which is not affected.
-void ComputeLifterCoeffs(BaseFloat Q, VectorBase<BaseFloat> *coeffs);
-
-
-// Durbin's recursion - converts autocorrelation coefficients to the LPC
-// pTmp - temporal place [n]
-// pAC - autocorrelation coefficients [n + 1]
-// pLP - linear prediction coefficients [n] (predicted_sn = sum_1^P{a[i-1] * s[n-i]}})
-//       F(z) = 1 / (1 - A(z)), 1 is not stored in the denominator
-// Returns log energy of residual (I think)
-BaseFloat Durbin(int n, const BaseFloat *pAC, BaseFloat *pLP, BaseFloat *pTmp);
-
-// Compute LP coefficients from autocorrelation coefficients.
-// Returns log energy of residual (I think)
-BaseFloat ComputeLpc(const VectorBase<BaseFloat> &autocorr_in,
-                     Vector<BaseFloat> *lpc_out);
-
-void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst);
-
-
-
-void GetEqualLoudnessVector(const MelBanks &mel_banks,
-                            Vector<BaseFloat> *ans);
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-
-#endif  // KALDI_FEAT_MEL_COMPUTATIONS_H_
--- a/speechx/speechx/kaldi/feat/online-feature-itf.h
+++ b/speechx/speechx/kaldi/feat/online-feature-itf.h
@ -1,125 +0,0 @@
-// feat/online-feature-itf.h
-
-// Copyright    2013  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_ONLINE_FEATURE_ITF_H_
-#define KALDI_FEAT_ONLINE_FEATURE_ITF_H_ 1
-#include "base/kaldi-common.h"
-#include "matrix/matrix-lib.h"
-
-namespace kaldi {
-/// @ingroup Interfaces
-/// @{
-
-/**
-   OnlineFeatureInterface is an interface for online feature processing (it is
-   also usable in the offline setting, but currently we're not using it for
-   that).  This is for use in the online2/ directory, and it supersedes the
-   interface in ../online/online-feat-input.h.  We have a slightly different
-   model that puts more control in the hands of the calling thread, and won't
-   involve waiting on semaphores in the decoding thread.
-
-   This interface only specifies how the object *outputs* the features.
-   How it obtains the features, e.g. from a previous object or objects of type
-   OnlineFeatureInterface, is not specified in the interface and you will
-   likely define new constructors or methods in the derived type to do that.
-
-   You should appreciate that this interface is designed to allow random
-   access to features, as long as they are ready.  That is, the user
-   can call GetFrame for any frame less than NumFramesReady(), and when
-   implementing a child class you must not make assumptions about the
-   order in which the user makes these calls.
-*/
-
-class OnlineFeatureInterface {
- public:
-  virtual int32 Dim() const = 0; /// returns the feature dimension.
-
-  /// Returns the total number of frames, since the start of the utterance, that
-  /// are now available.  In an online-decoding context, this will likely
-  /// increase with time as more data becomes available.
-  virtual int32 NumFramesReady() const = 0;
-
-  /// Returns true if this is the last frame.  Frame indices are zero-based, so the
-  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
-  /// is empty (which is a case that I'm not sure all the code will handle, so
-  /// be careful).  This function may return false for some frame if
-  /// we haven't yet decided to terminate decoding, but later true if we decide
-  /// to terminate decoding.  This function exists mainly to correctly handle
-  /// end effects in feature extraction, and is not a mechanism to determine how
-  /// many frames are in the decodable object (as it used to be, and for backward
-  /// compatibility, still is, in the Decodable interface).
-  virtual bool IsLastFrame(int32 frame) const = 0;
-
-  /// Gets the feature vector for this frame.  Before calling this for a given
-  /// frame, it is assumed that you called NumFramesReady() and it returned a
-  /// number greater than "frame".  Otherwise this call will likely crash with
-  /// an assert failure.  This function is not declared const, in case there is
-  /// some kind of caching going on, but most of the time it shouldn't modify
-  /// the class.
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) = 0;
-
-
-  /// This is like GetFrame() but for a collection of frames.  There is a
-  /// default implementation that just gets the frames one by one, but it
-  /// may be overridden for efficiency by child classes (since sometimes
-  /// it's more efficient to do things in a batch).
-  virtual void GetFrames(const std::vector<int32> &frames,
-                         MatrixBase<BaseFloat> *feats) {
-    KALDI_ASSERT(static_cast<int32>(frames.size()) == feats->NumRows());
-    for (size_t i = 0; i < frames.size(); i++) {
-      SubVector<BaseFloat> feat(*feats, i);
-      GetFrame(frames[i], &feat);
-    }
-  }
-
-
-  // Returns frame shift in seconds.  Helps to estimate duration from frame
-  // counts.
-  virtual BaseFloat FrameShiftInSeconds() const = 0;
-
-  /// Virtual destructor.  Note: constructors that take another member of
-  /// type OnlineFeatureInterface are not expected to take ownership of
-  /// that pointer; the caller needs to keep track of that manually.
-  virtual ~OnlineFeatureInterface() { }
-
-};
-
-
-/// Add a virtual class for "source" features such as MFCC or PLP or pitch
-/// features.
-class OnlineBaseFeature: public OnlineFeatureInterface {
- public:
-  /// This would be called from the application, when you get more wave data.
-  /// Note: the sampling_rate is typically only provided so the code can assert
-  /// that it matches the sampling rate expected in the options.
-  virtual void AcceptWaveform(BaseFloat sampling_rate,
-                              const VectorBase<BaseFloat> &waveform) = 0;
-
-  /// InputFinished() tells the class you won't be providing any
-  /// more waveform.  This will help flush out the last few frames
-  /// of delta or LDA features (it will typically affect the return value
-  /// of IsLastFrame.
-  virtual void InputFinished() = 0;
-};
-
-
-/// @}
-}  // namespace Kaldi
-
-#endif  // KALDI_ITF_ONLINE_FEATURE_ITF_H_
--- a/speechx/speechx/kaldi/feat/online-feature.cc
+++ b/speechx/speechx/kaldi/feat/online-feature.cc
@ -1,679 +0,0 @@
-// feat/online-feature.cc
-
-// Copyright    2013  Johns Hopkins University (author: Daniel Povey)
-//              2014  Yanqing Sun, Junjie Wang,
-//                    Daniel Povey, Korbinian Riedhammer
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "feat/online-feature.h"
-#include "transform/cmvn.h"
-
-namespace kaldi {
-
-RecyclingVector::RecyclingVector(int items_to_hold):
-  items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
-  first_available_index_(0) {
-}
-
-RecyclingVector::~RecyclingVector() {
-  for (auto *item : items_) {
-    delete item;
-  }
-}
-
-Vector<BaseFloat> *RecyclingVector::At(int index) const {
-  if (index < first_available_index_) {
-    KALDI_ERR << "Attempted to retrieve feature vector that was "
-                 "already removed by the RecyclingVector (index = "
-              << index << "; "
-              << "first_available_index = " << first_available_index_ << "; "
-              << "size = " << Size() << ")";
-  }
-  // 'at' does size checking.
-  return items_.at(index - first_available_index_);
-}
-
-void RecyclingVector::PushBack(Vector<BaseFloat> *item) {
-  if (items_.size() == items_to_hold_) {
-    delete items_.front();
-    items_.pop_front();
-    ++first_available_index_;
-  }
-  items_.push_back(item);
-}
-
-int RecyclingVector::Size() const {
-  return first_available_index_ + items_.size();
-}
-
-template <class C>
-void OnlineGenericBaseFeature<C>::GetFrame(int32 frame,
-                                           VectorBase<BaseFloat> *feat) {
-  feat->CopyFromVec(*(features_.At(frame)));
-};
-
-template <class C>
-OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
-    const typename C::Options &opts):
-    computer_(opts), window_function_(computer_.GetFrameOptions()),
-    features_(opts.frame_opts.max_feature_vectors),
-    input_finished_(false), waveform_offset_(0) {
-  // RE the following assert: search for ONLINE_IVECTOR_LIMIT in
-  // online-ivector-feature.cc.
-  // Casting to uint32, an unsigned type, means that -1 would be treated
-  // as `very large`.
-  KALDI_ASSERT(static_cast<uint32>(opts.frame_opts.max_feature_vectors) > 200);
-}
-
-
-template <class C>
-void OnlineGenericBaseFeature<C>::MaybeCreateResampler(
-    BaseFloat sampling_rate) {
-  BaseFloat expected_sampling_rate = computer_.GetFrameOptions().samp_freq;
-
-  if (resampler_ != nullptr) {
-    KALDI_ASSERT(resampler_->GetInputSamplingRate() == sampling_rate);
-    KALDI_ASSERT(resampler_->GetOutputSamplingRate() == expected_sampling_rate);
-  } else if (((sampling_rate < expected_sampling_rate) &&
-              computer_.GetFrameOptions().allow_downsample) ||
-             ((sampling_rate > expected_sampling_rate) &&
-              computer_.GetFrameOptions().allow_upsample)) {
-    resampler_.reset(new LinearResample(
-        sampling_rate, expected_sampling_rate,
-        std::min(sampling_rate / 2, expected_sampling_rate / 2), 6));
-  } else if (sampling_rate != expected_sampling_rate) {
-    KALDI_ERR << "Sampling frequency mismatch, expected "
-              << expected_sampling_rate << ", got " << sampling_rate
-              << "\nPerhaps you want to use the options "
-                 "--allow_{upsample,downsample}";
-  }
-}
-
-template <class C>
-void OnlineGenericBaseFeature<C>::InputFinished() {
-  if (resampler_ != nullptr) {
-    // There may be a few samples left once we flush the resampler_ object, telling it
-    // that the file has finished.  This should rarely make any difference.
-    Vector<BaseFloat> appended_wave;
-    Vector<BaseFloat> resampled_wave;
-    resampler_->Resample(appended_wave, true, &resampled_wave);
-
-    if (resampled_wave.Dim() != 0) {
-      appended_wave.Resize(waveform_remainder_.Dim() +
-                           resampled_wave.Dim());
-      if (waveform_remainder_.Dim() != 0)
-        appended_wave.Range(0, waveform_remainder_.Dim())
-            .CopyFromVec(waveform_remainder_);
-      appended_wave.Range(waveform_remainder_.Dim(), resampled_wave.Dim())
-          .CopyFromVec(resampled_wave);
-      waveform_remainder_.Swap(&appended_wave);
-    }
-  }
-  input_finished_ = true;
-  ComputeFeatures();
-}
-
-template <class C>
-void OnlineGenericBaseFeature<C>::AcceptWaveform(
-    BaseFloat sampling_rate, const VectorBase<BaseFloat> &original_waveform) {
-  if (original_waveform.Dim() == 0)
-    return;  // Nothing to do.
-  if (input_finished_)
-    KALDI_ERR << "AcceptWaveform called after InputFinished() was called.";
-
-  Vector<BaseFloat> appended_wave;
-  Vector<BaseFloat> resampled_wave;
-
-  const VectorBase<BaseFloat> *waveform;
-
-  MaybeCreateResampler(sampling_rate);
-  if (resampler_ == nullptr) {
-    waveform = &original_waveform;
-  } else {
-    resampler_->Resample(original_waveform, false, &resampled_wave);
-    waveform = &resampled_wave;
-  }
-
-  appended_wave.Resize(waveform_remainder_.Dim() + waveform->Dim());
-  if (waveform_remainder_.Dim() != 0)
-    appended_wave.Range(0, waveform_remainder_.Dim())
-        .CopyFromVec(waveform_remainder_);
-  appended_wave.Range(waveform_remainder_.Dim(), waveform->Dim())
-      .CopyFromVec(*waveform);
-  waveform_remainder_.Swap(&appended_wave);
-  ComputeFeatures();
-}
-
-template <class C>
-void OnlineGenericBaseFeature<C>::ComputeFeatures() {
-  const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
-  int64 num_samples_total = waveform_offset_ + waveform_remainder_.Dim();
-  int32 num_frames_old = features_.Size(),
-      num_frames_new = NumFrames(num_samples_total, frame_opts,
-                                 input_finished_);
-  KALDI_ASSERT(num_frames_new >= num_frames_old);
-
-  Vector<BaseFloat> window;
-  bool need_raw_log_energy = computer_.NeedRawLogEnergy();
-  for (int32 frame = num_frames_old; frame < num_frames_new; frame++) {
-    BaseFloat raw_log_energy = 0.0;
-    ExtractWindow(waveform_offset_, waveform_remainder_, frame,
-                  frame_opts, window_function_, &window,
-                  need_raw_log_energy ? &raw_log_energy : NULL);
-    Vector<BaseFloat> *this_feature = new Vector<BaseFloat>(computer_.Dim(),
-                                                            kUndefined);
-    // note: this online feature-extraction code does not support VTLN.
-    BaseFloat vtln_warp = 1.0;
-    computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature);
-    features_.PushBack(this_feature);
-  }
-  // OK, we will now discard any portion of the signal that will not be
-  // necessary to compute frames in the future.
-  int64 first_sample_of_next_frame = FirstSampleOfFrame(num_frames_new,
-                                                        frame_opts);
-  int32 samples_to_discard = first_sample_of_next_frame - waveform_offset_;
-  if (samples_to_discard > 0) {
-    // discard the leftmost part of the waveform that we no longer need.
-    int32 new_num_samples = waveform_remainder_.Dim() - samples_to_discard;
-    if (new_num_samples <= 0) {
-      // odd, but we'll try to handle it.
-      waveform_offset_ += waveform_remainder_.Dim();
-      waveform_remainder_.Resize(0);
-    } else {
-      Vector<BaseFloat> new_remainder(new_num_samples);
-      new_remainder.CopyFromVec(waveform_remainder_.Range(samples_to_discard,
-                                                          new_num_samples));
-      waveform_offset_ += samples_to_discard;
-      waveform_remainder_.Swap(&new_remainder);
-    }
-  }
-}
-
-// instantiate the templates defined here for MFCC, PLP and filterbank classes.
-template class OnlineGenericBaseFeature<MfccComputer>;
-template class OnlineGenericBaseFeature<PlpComputer>;
-template class OnlineGenericBaseFeature<FbankComputer>;
-
-OnlineCmvnState::OnlineCmvnState(const OnlineCmvnState &other):
-    speaker_cmvn_stats(other.speaker_cmvn_stats),
-    global_cmvn_stats(other.global_cmvn_stats),
-    frozen_state(other.frozen_state) { }
-
-void OnlineCmvnState::Write(std::ostream &os, bool binary) const {
-  WriteToken(os, binary, "<OnlineCmvnState>");  // magic string.
-  WriteToken(os, binary, "<SpeakerCmvnStats>");
-  speaker_cmvn_stats.Write(os, binary);
-  WriteToken(os, binary, "<GlobalCmvnStats>");
-  global_cmvn_stats.Write(os, binary);
-  WriteToken(os, binary, "<FrozenState>");
-  frozen_state.Write(os, binary);
-  WriteToken(os, binary, "</OnlineCmvnState>");
-}
-
-void OnlineCmvnState::Read(std::istream &is, bool binary) {
-  ExpectToken(is, binary, "<OnlineCmvnState>");  // magic string.
-  ExpectToken(is, binary, "<SpeakerCmvnStats>");
-  speaker_cmvn_stats.Read(is, binary);
-  ExpectToken(is, binary, "<GlobalCmvnStats>");
-  global_cmvn_stats.Read(is, binary);
-  ExpectToken(is, binary, "<FrozenState>");
-  frozen_state.Read(is, binary);
-  ExpectToken(is, binary, "</OnlineCmvnState>");
-}
-
-OnlineCmvn::OnlineCmvn(const OnlineCmvnOptions &opts,
-                       const OnlineCmvnState &cmvn_state,
-                       OnlineFeatureInterface *src):
-    opts_(opts), temp_stats_(2, src->Dim() + 1),
-    temp_feats_(src->Dim()), temp_feats_dbl_(src->Dim()),
-    src_(src) {
-  SetState(cmvn_state);
-  if (!SplitStringToIntegers(opts.skip_dims, ":", false, &skip_dims_))
-    KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
-              <<  "integers)";
-}
-
-OnlineCmvn::OnlineCmvn(const OnlineCmvnOptions &opts,
-                       OnlineFeatureInterface *src):
-    opts_(opts), temp_stats_(2, src->Dim() + 1),
-    temp_feats_(src->Dim()), temp_feats_dbl_(src->Dim()),
-    src_(src) {
-  if (!SplitStringToIntegers(opts.skip_dims, ":", false, &skip_dims_))
-    KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
-              <<  "integers)";
-}
-
-
-void OnlineCmvn::GetMostRecentCachedFrame(int32 frame,
-                                          int32 *cached_frame,
-                                          MatrixBase<double> *stats) {
-  KALDI_ASSERT(frame >= 0);
-  InitRingBufferIfNeeded();
-  // look for a cached frame on a previous frame as close as possible in time
-  // to "frame".  Return if we get one.
-  for (int32 t = frame; t >= 0 && t >= frame - opts_.ring_buffer_size; t--) {
-    if (t % opts_.modulus == 0) {
-      // if this frame should be cached in cached_stats_modulo_, then
-      // we'll look there, and we won't go back any further in time.
-      break;
-    }
-    int32 index = t % opts_.ring_buffer_size;
-    if (cached_stats_ring_[index].first == t) {
-      *cached_frame = t;
-      stats->CopyFromMat(cached_stats_ring_[index].second);
-      return;
-    }
-  }
-  int32 n = frame / opts_.modulus;
-  if (n >= cached_stats_modulo_.size()) {
-    if (cached_stats_modulo_.size() == 0) {
-      *cached_frame = -1;
-      stats->SetZero();
-      return;
-    } else {
-      n = static_cast<int32>(cached_stats_modulo_.size() - 1);
-    }
-  }
-  *cached_frame = n * opts_.modulus;
-  KALDI_ASSERT(cached_stats_modulo_[n] != NULL);
-  stats->CopyFromMat(*(cached_stats_modulo_[n]));
-}
-
-// Initialize ring buffer for caching stats.
-void OnlineCmvn::InitRingBufferIfNeeded() {
-  if (cached_stats_ring_.empty() && opts_.ring_buffer_size > 0) {
-    Matrix<double> temp(2, this->Dim() + 1);
-    cached_stats_ring_.resize(opts_.ring_buffer_size,
-                              std::pair<int32, Matrix<double> >(-1, temp));
-  }
-}
-
-void OnlineCmvn::CacheFrame(int32 frame, const MatrixBase<double> &stats) {
-  KALDI_ASSERT(frame >= 0);
-  if (frame % opts_.modulus == 0) {  // store in cached_stats_modulo_.
-    int32 n = frame / opts_.modulus;
-    if (n >= cached_stats_modulo_.size()) {
-      // The following assert is a limitation on in what order you can call
-      // CacheFrame.  Fortunately the calling code always calls it in sequence,
-      // which it has to because you need a previous frame to compute the
-      // current one.
-      KALDI_ASSERT(n == cached_stats_modulo_.size());
-      cached_stats_modulo_.push_back(new Matrix<double>(stats));
-    } else {
-      KALDI_WARN << "Did not expect to reach this part of code.";
-      // do what seems right, but we shouldn't get here.
-      cached_stats_modulo_[n]->CopyFromMat(stats);
-    }
-  } else {  // store in the ring buffer.
-    InitRingBufferIfNeeded();
-    if (!cached_stats_ring_.empty()) {
-      int32 index = frame % cached_stats_ring_.size();
-      cached_stats_ring_[index].first = frame;
-      cached_stats_ring_[index].second.CopyFromMat(stats);
-    }
-  }
-}
-
-OnlineCmvn::~OnlineCmvn() {
-  for (size_t i = 0; i < cached_stats_modulo_.size(); i++)
-    delete cached_stats_modulo_[i];
-  cached_stats_modulo_.clear();
-}
-
-void OnlineCmvn::ComputeStatsForFrame(int32 frame,
-                                      MatrixBase<double> *stats_out) {
-  KALDI_ASSERT(frame >= 0 && frame < src_->NumFramesReady());
-
-  int32 dim = this->Dim(), cur_frame;
-  GetMostRecentCachedFrame(frame, &cur_frame, stats_out);
-
-  Vector<BaseFloat> &feats(temp_feats_);
-  Vector<double> &feats_dbl(temp_feats_dbl_);
-  while (cur_frame < frame) {
-    cur_frame++;
-    src_->GetFrame(cur_frame, &feats);
-    feats_dbl.CopyFromVec(feats);
-    stats_out->Row(0).Range(0, dim).AddVec(1.0, feats_dbl);
-    if (opts_.normalize_variance)
-      stats_out->Row(1).Range(0, dim).AddVec2(1.0, feats_dbl);
-    (*stats_out)(0, dim) += 1.0;
-    // it's a sliding buffer; a frame at the back may be
-    // leaving the buffer so we have to subtract that.
-    int32 prev_frame = cur_frame - opts_.cmn_window;
-    if (prev_frame >= 0) {
-      // we need to subtract frame prev_f from the stats.
-      src_->GetFrame(prev_frame, &feats);
-      feats_dbl.CopyFromVec(feats);
-      stats_out->Row(0).Range(0, dim).AddVec(-1.0, feats_dbl);
-      if (opts_.normalize_variance)
-        stats_out->Row(1).Range(0, dim).AddVec2(-1.0, feats_dbl);
-      (*stats_out)(0, dim) -= 1.0;
-    }
-    CacheFrame(cur_frame, (*stats_out));
-  }
-}
-
-
-// static
-void OnlineCmvn::SmoothOnlineCmvnStats(const MatrixBase<double> &speaker_stats,
-                                       const MatrixBase<double> &global_stats,
-                                       const OnlineCmvnOptions &opts,
-                                       MatrixBase<double> *stats) {
-  if (speaker_stats.NumRows() == 2 && !opts.normalize_variance) {
-    // this is just for efficiency: don't operate on the variance if it's not
-    // needed.
-    int32 cols = speaker_stats.NumCols();  // dim + 1
-    SubMatrix<double> stats_temp(*stats, 0, 1, 0, cols);
-    SmoothOnlineCmvnStats(speaker_stats.RowRange(0, 1),
-                          global_stats.RowRange(0, 1),
-                          opts, &stats_temp);
-    return;
-  }
-  int32 dim = stats->NumCols() - 1;
-  double cur_count = (*stats)(0, dim);
-  // If count exceeded cmn_window it would be an error in how "window_stats"
-  // was accumulated.
-  KALDI_ASSERT(cur_count <= 1.001 * opts.cmn_window);
-  if (cur_count >= opts.cmn_window)
-    return;
-  if (speaker_stats.NumRows() != 0) {  // if we have speaker stats..
-    double count_from_speaker = opts.cmn_window - cur_count,
-        speaker_count = speaker_stats(0, dim);
-    if (count_from_speaker > opts.speaker_frames)
-      count_from_speaker = opts.speaker_frames;
-    if (count_from_speaker > speaker_count)
-      count_from_speaker = speaker_count;
-    if (count_from_speaker > 0.0)
-      stats->AddMat(count_from_speaker / speaker_count,
-                             speaker_stats);
-    cur_count = (*stats)(0, dim);
-  }
-  if (cur_count >= opts.cmn_window)
-    return;
-  if (global_stats.NumRows() != 0) {
-    double count_from_global = opts.cmn_window - cur_count,
-        global_count = global_stats(0, dim);
-    KALDI_ASSERT(global_count > 0.0);
-    if (count_from_global > opts.global_frames)
-      count_from_global = opts.global_frames;
-    if (count_from_global > 0.0)
-      stats->AddMat(count_from_global / global_count,
-                    global_stats);
-  } else {
-    KALDI_ERR << "Global CMN stats are required";
-  }
-}
-
-void OnlineCmvn::GetFrame(int32 frame,
-                          VectorBase<BaseFloat> *feat) {
-  src_->GetFrame(frame, feat);
-  KALDI_ASSERT(feat->Dim() == this->Dim());
-  int32 dim = feat->Dim();
-  Matrix<double> &stats(temp_stats_);
-  stats.Resize(2, dim + 1, kUndefined);  // Will do nothing if size was correct.
-  if (frozen_state_.NumRows() != 0) {  // the CMVN state has been frozen.
-    stats.CopyFromMat(frozen_state_);
-  } else {
-    // first get the raw CMVN stats (this involves caching..)
-    this->ComputeStatsForFrame(frame, &stats);
-    // now smooth them.
-    SmoothOnlineCmvnStats(orig_state_.speaker_cmvn_stats,
-                          orig_state_.global_cmvn_stats,
-                          opts_,
-                          &stats);
-  }
-
-  if (!skip_dims_.empty())
-    FakeStatsForSomeDims(skip_dims_, &stats);
-
-  // call the function ApplyCmvn declared in ../transform/cmvn.h, which
-  // requires a matrix.
-  // 1 row; num-cols == dim; stride  == dim.
-  SubMatrix<BaseFloat> feat_mat(feat->Data(), 1, dim, dim);
-  // the function ApplyCmvn takes a matrix, so form a one-row matrix to give it.
-  if (opts_.normalize_mean)
-    ApplyCmvn(stats, opts_.normalize_variance, &feat_mat);
-  else
-    KALDI_ASSERT(!opts_.normalize_variance);
-}
-
-void OnlineCmvn::Freeze(int32 cur_frame) {
-  int32 dim = this->Dim();
-  Matrix<double> stats(2, dim + 1);
-  // get the raw CMVN stats
-  this->ComputeStatsForFrame(cur_frame, &stats);
-  // now smooth them.
-  SmoothOnlineCmvnStats(orig_state_.speaker_cmvn_stats,
-                        orig_state_.global_cmvn_stats,
-                        opts_,
-                        &stats);
-  this->frozen_state_ = stats;
-}
-
-void OnlineCmvn::GetState(int32 cur_frame,
-                          OnlineCmvnState *state_out) {
-  *state_out = this->orig_state_;
-  { // This block updates state_out->speaker_cmvn_stats
-    int32 dim = this->Dim();
-    if (state_out->speaker_cmvn_stats.NumRows() == 0)
-      state_out->speaker_cmvn_stats.Resize(2, dim + 1);
-    Vector<BaseFloat> feat(dim);
-    Vector<double> feat_dbl(dim);
-    for (int32 t = 0; t <= cur_frame; t++) {
-      src_->GetFrame(t, &feat);
-      feat_dbl.CopyFromVec(feat);
-      state_out->speaker_cmvn_stats(0, dim) += 1.0;
-      state_out->speaker_cmvn_stats.Row(0).Range(0, dim).AddVec(1.0, feat_dbl);
-      state_out->speaker_cmvn_stats.Row(1).Range(0, dim).AddVec2(1.0, feat_dbl);
-    }
-  }
-  // Store any frozen state (the effect of the user possibly
-  // having called Freeze().
-  state_out->frozen_state = frozen_state_;
-}
-
-void OnlineCmvn::SetState(const OnlineCmvnState &cmvn_state) {
-  KALDI_ASSERT(cached_stats_modulo_.empty() &&
-               "You cannot call SetState() after processing data.");
-  orig_state_ = cmvn_state;
-  frozen_state_ = cmvn_state.frozen_state;
-}
-
-int32 OnlineSpliceFrames::NumFramesReady() const {
-  int32 num_frames = src_->NumFramesReady();
-  if (num_frames > 0 && src_->IsLastFrame(num_frames - 1))
-    return num_frames;
-  else
-    return std::max<int32>(0, num_frames - right_context_);
-}
-
-void OnlineSpliceFrames::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
-  KALDI_ASSERT(left_context_ >= 0 && right_context_ >= 0);
-  KALDI_ASSERT(frame >= 0 && frame < NumFramesReady());
-  int32 dim_in = src_->Dim();
-  KALDI_ASSERT(feat->Dim() == dim_in * (1 + left_context_ + right_context_));
-  int32 T = src_->NumFramesReady();
-  for (int32 t2 = frame - left_context_; t2 <= frame + right_context_; t2++) {
-    int32 t2_limited = t2;
-    if (t2_limited < 0) t2_limited = 0;
-    if (t2_limited >= T) t2_limited = T - 1;
-    int32 n = t2 - (frame - left_context_);  // 0 for left-most frame,
-                                             // increases to the right.
-    SubVector<BaseFloat> part(*feat, n * dim_in, dim_in);
-    src_->GetFrame(t2_limited, &part);
-  }
-}
-
-OnlineTransform::OnlineTransform(const MatrixBase<BaseFloat> &transform,
-                                 OnlineFeatureInterface *src):
-    src_(src) {
-  int32 src_dim = src_->Dim();
-  if (transform.NumCols() == src_dim) {  // Linear transform
-    linear_term_ = transform;
-    offset_.Resize(transform.NumRows());  // Resize() will zero it.
-  } else if (transform.NumCols() == src_dim + 1) {  // Affine transform
-    linear_term_ = transform.Range(0, transform.NumRows(), 0, src_dim);
-    offset_.Resize(transform.NumRows());
-    offset_.CopyColFromMat(transform, src_dim);
-  } else {
-    KALDI_ERR << "Dimension mismatch: source features have dimension "
-              << src_dim << " and LDA #cols is " << transform.NumCols();
-  }
-}
-
-void OnlineTransform::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
-  Vector<BaseFloat> input_feat(linear_term_.NumCols());
-  src_->GetFrame(frame, &input_feat);
-  feat->CopyFromVec(offset_);
-  feat->AddMatVec(1.0, linear_term_, kNoTrans, input_feat, 1.0);
-}
-
-void OnlineTransform::GetFrames(
-    const std::vector<int32> &frames, MatrixBase<BaseFloat> *feats) {
-  KALDI_ASSERT(static_cast<int32>(frames.size()) == feats->NumRows());
-  int32 num_frames = feats->NumRows(),
-      input_dim = linear_term_.NumCols();
-  Matrix<BaseFloat> input_feats(num_frames, input_dim, kUndefined);
-  src_->GetFrames(frames, &input_feats);
-  feats->CopyRowsFromVec(offset_);
-  feats->AddMatMat(1.0, input_feats, kNoTrans, linear_term_, kTrans, 1.0);
-}
-
-
-int32 OnlineDeltaFeature::Dim() const {
-  int32 src_dim = src_->Dim();
-  return src_dim * (1 + opts_.order);
-}
-
-int32 OnlineDeltaFeature::NumFramesReady() const {
-  int32 num_frames = src_->NumFramesReady(),
-      context = opts_.order * opts_.window;
-  // "context" is the number of frames on the left or (more relevant
-  // here) right which we need in order to produce the output.
-  if (num_frames > 0 && src_->IsLastFrame(num_frames-1))
-    return num_frames;
-  else
-    return std::max<int32>(0, num_frames - context);
-}
-
-void OnlineDeltaFeature::GetFrame(int32 frame,
-                                      VectorBase<BaseFloat> *feat) {
-  KALDI_ASSERT(frame >= 0 && frame < NumFramesReady());
-  KALDI_ASSERT(feat->Dim() == Dim());
-  // We'll produce a temporary matrix containing the features we want to
-  // compute deltas on, but truncated to the necessary context.
-  int32 context = opts_.order * opts_.window;
-  int32 left_frame = frame - context,
-      right_frame = frame + context,
-      src_frames_ready = src_->NumFramesReady();
-  if (left_frame < 0) left_frame = 0;
-  if (right_frame >= src_frames_ready)
-    right_frame = src_frames_ready - 1;
-  KALDI_ASSERT(right_frame >= left_frame);
-  int32 temp_num_frames = right_frame + 1 - left_frame,
-      src_dim = src_->Dim();
-  Matrix<BaseFloat> temp_src(temp_num_frames, src_dim);
-  for (int32 t = left_frame; t <= right_frame; t++) {
-    SubVector<BaseFloat> temp_row(temp_src, t - left_frame);
-    src_->GetFrame(t, &temp_row);
-  }
-  int32 temp_t = frame - left_frame;  // temp_t is the offset of frame "frame"
-                                      // within temp_src
-  delta_features_.Process(temp_src, temp_t, feat);
-}
-
-
-OnlineDeltaFeature::OnlineDeltaFeature(const DeltaFeaturesOptions &opts,
-                                       OnlineFeatureInterface *src):
-    src_(src), opts_(opts), delta_features_(opts) { }
-
-void OnlineCacheFeature::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
-  KALDI_ASSERT(frame >= 0);
-  if (static_cast<size_t>(frame) < cache_.size() && cache_[frame] != NULL) {
-    feat->CopyFromVec(*(cache_[frame]));
-  } else {
-    if (static_cast<size_t>(frame) >= cache_.size())
-      cache_.resize(frame + 1, NULL);
-    int32 dim = this->Dim();
-    cache_[frame] = new Vector<BaseFloat>(dim);
-    // The following call will crash if frame "frame" is not ready.
-    src_->GetFrame(frame, cache_[frame]);
-    feat->CopyFromVec(*(cache_[frame]));
-  }
-}
-
-void OnlineCacheFeature::GetFrames(
-    const std::vector<int32> &frames, MatrixBase<BaseFloat> *feats) {
-  int32 num_frames = frames.size();
-  // non_cached_frames will be the subset of 't' values in 'frames' which were
-  // not previously cached, which we therefore need to get from src_.
-  std::vector<int32> non_cached_frames;
-  // 'non_cached_indexes' stores the indexes 'i' into 'frames' corresponding to
-  // the corresponding frames in 'non_cached_frames'.
-  std::vector<int32> non_cached_indexes;
-  non_cached_frames.reserve(frames.size());
-  non_cached_indexes.reserve(frames.size());
-  for (int32 i = 0; i < num_frames; i++) {
-    int32 t = frames[i];
-    if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
-      feats->Row(i).CopyFromVec(*(cache_[t]));
-    } else {
-      non_cached_frames.push_back(t);
-      non_cached_indexes.push_back(i);
-    }
-  }
-  if (non_cached_frames.empty())
-    return;
-  int32 num_non_cached_frames = non_cached_frames.size(),
-      dim = this->Dim();
-  Matrix<BaseFloat> non_cached_feats(num_non_cached_frames, dim,
-                                     kUndefined);
-  src_->GetFrames(non_cached_frames, &non_cached_feats);
-  for (int32 i = 0; i < num_non_cached_frames; i++) {
-    int32 t = non_cached_frames[i];
-    if (static_cast<size_t>(t) < cache_.size() && cache_[t] != NULL) {
-      // We can reach this point due to repeat indexes in 'non_cached_frames'.
-      feats->Row(non_cached_indexes[i]).CopyFromVec(*(cache_[t]));
-    } else {
-      SubVector<BaseFloat> this_feat(non_cached_feats, i);
-      feats->Row(non_cached_indexes[i]).CopyFromVec(this_feat);
-      if (static_cast<size_t>(t) >= cache_.size())
-        cache_.resize(t + 1, NULL);
-      cache_[t] = new Vector<BaseFloat>(this_feat);
-    }
-  }
-}
-
-
-void OnlineCacheFeature::ClearCache() {
-  for (size_t i = 0; i < cache_.size(); i++)
-    delete cache_[i];
-  cache_.resize(0);
-}
-
-
-void OnlineAppendFeature::GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
-  KALDI_ASSERT(feat->Dim() == Dim());
-
-  SubVector<BaseFloat> feat1(*feat, 0, src1_->Dim());
-  SubVector<BaseFloat> feat2(*feat, src1_->Dim(), src2_->Dim());
-  src1_->GetFrame(frame, &feat1);
-  src2_->GetFrame(frame, &feat2);
-};
-
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/online-feature.h
+++ b/speechx/speechx/kaldi/feat/online-feature.h
@ -1,632 +0,0 @@
-// feat/online-feature.h
-
-// Copyright 2013   Johns Hopkins University (author: Daniel Povey)
-//           2014   Yanqing Sun, Junjie Wang,
-//                  Daniel Povey, Korbinian Riedhammer
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_FEAT_ONLINE_FEATURE_H_
-#define KALDI_FEAT_ONLINE_FEATURE_H_
-
-#include <string>
-#include <vector>
-#include <deque>
-
-#include "matrix/matrix-lib.h"
-#include "util/common-utils.h"
-#include "base/kaldi-error.h"
-#include "feat/feature-functions.h"
-#include "feat/feature-mfcc.h"
-#include "feat/feature-plp.h"
-#include "feat/feature-fbank.h"
-#include "feat/online-feature-itf.h"
-
-namespace kaldi {
-/// @addtogroup  onlinefeat OnlineFeatureExtraction
-/// @{
-
-
-/// This class serves as a storage for feature vectors with an option to limit
-/// the memory usage by removing old elements. The deleted frames indices are
-/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
-/// provides the indices as if no deletion was being performed.
-/// This is useful when processing very long recordings which would otherwise
-/// cause the memory to eventually blow up when the features are not being removed.
-class RecyclingVector {
-public:
-  /// By default it does not remove any elements.
-  RecyclingVector(int items_to_hold = -1);
-
-  /// The ownership is being retained by this collection - do not delete the item.
-  Vector<BaseFloat> *At(int index) const;
-
-  /// The ownership of the item is passed to this collection - do not delete the item.
-  void PushBack(Vector<BaseFloat> *item);
-
-  /// This method returns the size as if no "recycling" had happened,
-  /// i.e. equivalent to the number of times the PushBack method has been called.
-  int Size() const;
-
-  ~RecyclingVector();
-
-private:
-  std::deque<Vector<BaseFloat>*> items_;
-  int items_to_hold_;
-  int first_available_index_;
-};
-
-
-/// This is a templated class for online feature extraction;
-/// it's templated on a class like MfccComputer or PlpComputer
-/// that does the basic feature extraction.
-template<class C>
-class OnlineGenericBaseFeature: public OnlineBaseFeature {
- public:
-  //
-  // First, functions that are present in the interface:
-  //
-  virtual int32 Dim() const { return computer_.Dim(); }
-
-  // Note: IsLastFrame() will only ever return true if you have called
-  // InputFinished() (and this frame is the last frame).
-  virtual bool IsLastFrame(int32 frame) const {
-    return input_finished_ && frame == NumFramesReady() - 1;
-  }
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
-  }
-
-  virtual int32 NumFramesReady() const { return features_.Size(); }
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  // Next, functions that are not in the interface.
-
-
-  // Constructor from options class
-  explicit OnlineGenericBaseFeature(const typename C::Options &opts);
-
-  // This would be called from the application, when you get
-  // more wave data.  Note: the sampling_rate is only provided so
-  // the code can assert that it matches the sampling rate
-  // expected in the options.
-  virtual void AcceptWaveform(BaseFloat sampling_rate,
-                              const VectorBase<BaseFloat> &waveform);
-
-
-  // InputFinished() tells the class you won't be providing any
-  // more waveform.  This will help flush out the last frame or two
-  // of features, in the case where snip-edges == false; it also
-  // affects the return value of IsLastFrame().
-  virtual void InputFinished();
-
- private:
-  // This function computes any additional feature frames that it is possible to
-  // compute from 'waveform_remainder_', which at this point may contain more
-  // than just a remainder-sized quantity (because AcceptWaveform() appends to
-  // waveform_remainder_ before calling this function).  It adds these feature
-  // frames to features_, and shifts off any now-unneeded samples of input from
-  // waveform_remainder_ while incrementing waveform_offset_ by the same amount.
-  void ComputeFeatures();
-
-  void MaybeCreateResampler(BaseFloat sampling_rate);
-
-  C computer_;  // class that does the MFCC or PLP or filterbank computation
-
-  // resampler in cases when the input sampling frequency is not equal to
-  // the expected sampling rate
-  std::unique_ptr<LinearResample> resampler_;
-
-  FeatureWindowFunction window_function_;
-
-  // features_ is the Mfcc or Plp or Fbank features that we have already computed.
-
-  RecyclingVector features_;
-
-  // True if the user has called "InputFinished()"
-  bool input_finished_;
-
-  // The sampling frequency, extracted from the config.  Should
-  // be identical to the waveform supplied.
-  BaseFloat sampling_frequency_;
-
-  // waveform_offset_ is the number of samples of waveform that we have
-  // already discarded, i.e. that were prior to 'waveform_remainder_'.
-  int64 waveform_offset_;
-
-  // waveform_remainder_ is a short piece of waveform that we may need to keep
-  // after extracting all the whole frames we can (whatever length of feature
-  // will be required for the next phase of computation).
-  Vector<BaseFloat> waveform_remainder_;
-};
-
-typedef OnlineGenericBaseFeature<MfccComputer> OnlineMfcc;
-typedef OnlineGenericBaseFeature<PlpComputer> OnlinePlp;
-typedef OnlineGenericBaseFeature<FbankComputer> OnlineFbank;
-
-
-/// This class takes a Matrix<BaseFloat> and wraps it as an
-/// OnlineFeatureInterface: this can be useful where some earlier stage of
-/// feature processing has been done offline but you want to use part of the
-/// online pipeline.
-class OnlineMatrixFeature: public OnlineFeatureInterface {
- public:
-  /// Caution: this class maintains the const reference from the constructor, so
-  /// don't let it go out of scope while this object exists.
-  explicit OnlineMatrixFeature(const MatrixBase<BaseFloat> &mat): mat_(mat) { }
-
-  virtual int32 Dim() const { return mat_.NumCols(); }
-
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return 0.01f;
-  }
-
-  virtual int32 NumFramesReady() const { return mat_.NumRows(); }
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) {
-    feat->CopyFromVec(mat_.Row(frame));
-  }
-
-  virtual bool IsLastFrame(int32 frame) const {
-    return (frame + 1 == mat_.NumRows());
-  }
-
-
- private:
-  const MatrixBase<BaseFloat> &mat_;
-};
-
-
-// Note the similarity with SlidingWindowCmnOptions, but there
-// are also differences.  One which doesn't appear in the config
-// itself, because it's a difference between the setups, is that
-// in OnlineCmn, we carry over data from the previous utterance,
-// or, if no previous utterance is available, from global stats,
-// or, if previous utterances are available but the total amount
-// of data is less than prev_frames, we pad with up to "global_frames"
-// frames from the global stats.
-struct OnlineCmvnOptions {
-  int32 cmn_window;
-  int32 speaker_frames;  // must be <= cmn_window
-  int32 global_frames;  // must be <= speaker_frames.
-  bool normalize_mean;  // Must be true if normalize_variance==true.
-  bool normalize_variance;
-
-  int32 modulus;  // not configurable from command line, relates to how the
-                  // class computes the cmvn internally.  smaller->more
-                  // time-efficient but less memory-efficient.  Must be >= 1.
-  int32 ring_buffer_size;  // not configurable from command line; size of ring
-                           // buffer used for caching CMVN stats.  Must be >=
-                           // modulus.
-  std::string skip_dims; // Colon-separated list of dimensions to skip normalization
-                         // of, e.g. 13:14:15.
-
-  OnlineCmvnOptions():
-      cmn_window(600),
-      speaker_frames(600),
-      global_frames(200),
-      normalize_mean(true),
-      normalize_variance(false),
-      modulus(20),
-      ring_buffer_size(20),
-      skip_dims("") { }
-
-  void Check() const {
-    KALDI_ASSERT(speaker_frames <= cmn_window && global_frames <= speaker_frames
-                 && modulus > 0);
-  }
-
-  void Register(ParseOptions *po) {
-    po->Register("cmn-window", &cmn_window, "Number of frames of sliding "
-                 "context for cepstral mean normalization.");
-    po->Register("global-frames", &global_frames, "Number of frames of "
-                 "global-average cepstral mean normalization stats to use for "
-                 "first utterance of a speaker");
-    po->Register("speaker-frames", &speaker_frames, "Number of frames of "
-                 "previous utterance(s) from this speaker to use in cepstral "
-                 "mean normalization");
-    // we name the config string "norm-vars" for compatibility with
-    // ../featbin/apply-cmvn.cc
-    po->Register("norm-vars", &normalize_variance, "If true, do "
-                 "cepstral variance normalization in addition to cepstral mean "
-                 "normalization ");
-    po->Register("norm-means", &normalize_mean, "If true, do mean normalization "
-                 "(note: you cannot normalize the variance but not the mean)");
-    po->Register("skip-dims", &skip_dims, "Dimensions to skip normalization of "
-                 "(colon-separated list of integers)");}
-};
-
-
-
-/** Struct OnlineCmvnState stores the state of CMVN adaptation between
-    utterances (but not the state of the computation within an utterance).  It
-    stores the global CMVN stats and the stats of the current speaker (if we
-    have seen previous utterances for this speaker), and possibly will have a
-    member "frozen_state": if the user has called the function Freeze() of class
-    OnlineCmvn, to fix the CMVN so we can estimate fMLLR on top of the fixed
-    value of cmvn.  If nonempty, "frozen_state" will reflect how we were
-    normalizing the mean and (if applicable) variance at the time when that
-    function was called.
-*/
-struct OnlineCmvnState {
-  // The following is the total CMVN stats for this speaker (up till now), in
-  // the same format.
-  Matrix<double> speaker_cmvn_stats;
-
-  // The following is the global CMVN stats, in the usual
-  // format, of dimension 2 x (dim+1), as [  sum-stats          count
-  //                                       sum-squared-stats   0    ]
-  Matrix<double> global_cmvn_stats;
-
-  // If nonempty, contains CMVN stats representing the "frozen" state
-  // of CMVN that reflects how we were normalizing the data when the
-  // user called the Freeze() function in class OnlineCmvn.
-  Matrix<double> frozen_state;
-
-  OnlineCmvnState() { }
-
-  explicit OnlineCmvnState(const Matrix<double> &global_stats):
-      global_cmvn_stats(global_stats) { }
-
-  // Copy constructor
-  OnlineCmvnState(const OnlineCmvnState &other);
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
-  // Use the default assignment operator.
-};
-
-/**
-   This class does an online version of the cepstral mean and [optionally]
-   variance, but note that this is not equivalent to the offline version.  This
-   is necessarily so, as the offline computation involves looking into the
-   future.  If you plan to use features normalized with this type of CMVN then
-   you need to train in a `matched' way, i.e. with the same type of features.
-   We normally only do so in the "online" GMM-based decoding, e.g.  in
-   online2bin/online2-wav-gmm-latgen-faster.cc; see also the script
-   steps/online/prepare_online_decoding.sh and steps/online/decode.sh.
-
-   In the steady state (in the middle of a long utterance), this class
-   accumulates CMVN statistics from the previous "cmn_window" frames (default 600
-   frames, or 6 seconds), and uses these to normalize the mean and possibly
-   variance of the current frame.
-
-   The config variables "speaker_frames" and "global_frames" relate to what
-   happens at the beginning of the utterance when we have seen fewer than
-   "cmn_window" frames of context, and so might not have very good stats to
-   normalize with.  Basically, we first augment any existing stats with up
-   to "speaker_frames" frames of stats from previous utterances of the current
-   speaker, and if this doesn't take us up to the required "cmn_window" frame
-   count, we further augment with up to "global_frames" frames of global
-   stats.  The global stats are CMVN stats accumulated from training or testing
-   data, that give us a reasonable source of mean and variance for "typical"
-   data.
- */
-class OnlineCmvn: public OnlineFeatureInterface {
- public:
-
-  //
-  // First, functions that are present in the interface:
-  //
-  virtual int32 Dim() const { return src_->Dim(); }
-
-  virtual bool IsLastFrame(int32 frame) const {
-    return src_->IsLastFrame(frame);
-  }
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return src_->FrameShiftInSeconds();
-  }
-
-  // The online cmvn does not introduce any additional latency.
-  virtual int32 NumFramesReady() const { return src_->NumFramesReady(); }
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  //
-  // Next, functions that are not in the interface.
-  //
-
-  /// Initializer that sets the cmvn state.  If you don't have previous
-  /// utterances from the same speaker you are supposed to initialize the CMVN
-  /// state from some global CMVN stats, which you can get from summing all cmvn
-  /// stats you have in your training data using "sum-matrix".  This just gives
-  /// it a reasonable starting point at the start of the file.
-  /// If you do have previous utterances from the same speaker or at least a
-  /// similar environment, you are supposed to initialize it by calling GetState
-  /// from the previous utterance
-  OnlineCmvn(const OnlineCmvnOptions &opts,
-             const OnlineCmvnState &cmvn_state,
-             OnlineFeatureInterface *src);
-
-  /// Initializer that does not set the cmvn state:
-  /// after calling this, you should call SetState().
-  OnlineCmvn(const OnlineCmvnOptions &opts,
-             OnlineFeatureInterface *src);
-
-  // Outputs any state information from this utterance to "cmvn_state".
-  // The value of "cmvn_state" before the call does not matter: the output
-  // depends on the value of OnlineCmvnState the class was initialized
-  // with, the input feature values up to cur_frame, and the effects
-  // of the user possibly having called Freeze().
-  // If cur_frame is -1, it will just output the unmodified original
-  // state that was supplied to this object.
-  void GetState(int32 cur_frame,
-                OnlineCmvnState *cmvn_state);
-
-  // This function can be used to modify the state of the CMVN computation
-  // from outside, but must only be called before you have processed any data
-  // (otherwise it will crash).  This "state" is really just the information
-  // that is propagated between utterances, not the state of the computation
-  // inside an utterance.
-  void SetState(const OnlineCmvnState &cmvn_state);
-
-  // From this point it will freeze the CMN to what it would have been if
-  // measured at frame "cur_frame", and it will stop it from changing
-  // further. This also applies retroactively for this utterance, so if you
-  // call GetFrame() on previous frames, it will use the CMVN stats
-  // from cur_frame; and it applies in the future too if you then
-  // call OutputState() and use this state to initialize the next
-  // utterance's CMVN object.
-  void Freeze(int32 cur_frame);
-
-  virtual ~OnlineCmvn();
- private:
-
-  /// Smooth the CMVN stats "stats" (which are stored in the normal format as a
-  /// 2 x (dim+1) matrix), by possibly adding some stats from "global_stats"
-  /// and/or "speaker_stats", controlled by the config.  The best way to
-  /// understand the smoothing rule we use is just to look at the code.
-  static void SmoothOnlineCmvnStats(const MatrixBase<double> &speaker_stats,
-                                    const MatrixBase<double> &global_stats,
-                                    const OnlineCmvnOptions &opts,
-                                    MatrixBase<double> *stats);
-
-  /// Get the most recent cached frame of CMVN stats.  [If no frames
-  /// were cached, sets up empty stats for frame zero and returns that].
-  void GetMostRecentCachedFrame(int32 frame,
-                                int32 *cached_frame,
-                                MatrixBase<double> *stats);
-
-  /// Cache this frame of stats.
-  void CacheFrame(int32 frame, const MatrixBase<double> &stats);
-
-  /// Initialize ring buffer for caching stats.
-  inline void InitRingBufferIfNeeded();
-
-  /// Computes the raw CMVN stats for this frame, making use of (and updating if
-  /// necessary) the cached statistics in raw_stats_.  This means the (x,
-  /// x^2, count) stats for the last up to opts_.cmn_window frames.
-  void ComputeStatsForFrame(int32 frame,
-                            MatrixBase<double> *stats);
-
-
-  OnlineCmvnOptions opts_;
-  std::vector<int32> skip_dims_; // Skip CMVN for these dimensions.  Derived from opts_.
-  OnlineCmvnState orig_state_;   // reflects the state before we saw this
-                                 // utterance.
-  Matrix<double> frozen_state_;  // If the user called Freeze(), this variable
-                                 // will reflect the CMVN state that we froze
-                                 // at.
-
-  // The variable below reflects the raw (count, x, x^2) statistics of the
-  // input, computed every opts_.modulus frames.  raw_stats_[n / opts_.modulus]
-  // contains the (count, x, x^2) statistics for the frames from
-  // std::max(0, n - opts_.cmn_window) through n.
-  std::vector<Matrix<double>*> cached_stats_modulo_;
-  // the variable below is a ring-buffer of cached stats.  the int32 is the
-  // frame index.
-  std::vector<std::pair<int32, Matrix<double> > > cached_stats_ring_;
-
-  // Some temporary variables used inside functions of this class, which
-  // put here to avoid reallocation.
-  Matrix<double> temp_stats_;
-  Vector<BaseFloat> temp_feats_;
-  Vector<double> temp_feats_dbl_;
-
-  OnlineFeatureInterface *src_;  // Not owned here
-};
-
-
-struct OnlineSpliceOptions {
-  int32 left_context;
-  int32 right_context;
-  OnlineSpliceOptions(): left_context(4), right_context(4) { }
-  void Register(ParseOptions *po) {
-    po->Register("left-context", &left_context, "Left-context for frame "
-                 "splicing prior to LDA");
-    po->Register("right-context", &right_context, "Right-context for frame "
-                 "splicing prior to LDA");
-  }
-};
-
-class OnlineSpliceFrames: public OnlineFeatureInterface {
- public:
-  //
-  // First, functions that are present in the interface:
-  //
-  virtual int32 Dim() const {
-    return src_->Dim() * (1 + left_context_ + right_context_);
-  }
-
-  virtual bool IsLastFrame(int32 frame) const {
-    return src_->IsLastFrame(frame);
-  }
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return src_->FrameShiftInSeconds();
-  }
-
-  virtual int32 NumFramesReady() const;
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  //
-  // Next, functions that are not in the interface.
-  //
-  OnlineSpliceFrames(const OnlineSpliceOptions &opts,
-                     OnlineFeatureInterface *src):
-      left_context_(opts.left_context), right_context_(opts.right_context),
-      src_(src) { }
-
- private:
-  int32 left_context_;
-  int32 right_context_;
-  OnlineFeatureInterface *src_;  // Not owned here
-};
-
-/// This online-feature class implements any affine or linear transform.
-class OnlineTransform: public OnlineFeatureInterface {
- public:
-  //
-  // First, functions that are present in the interface:
-  //
-  virtual int32 Dim() const { return offset_.Dim(); }
-
-  virtual bool IsLastFrame(int32 frame) const {
-    return src_->IsLastFrame(frame);
-  }
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return src_->FrameShiftInSeconds();
-  }
-
-  virtual int32 NumFramesReady() const { return src_->NumFramesReady(); }
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  virtual void GetFrames(const std::vector<int32> &frames,
-                         MatrixBase<BaseFloat> *feats);
-
-  //
-  // Next, functions that are not in the interface.
-  //
-
-  /// The transform can be a linear transform, or an affine transform
-  /// where the last column is the offset.
-  OnlineTransform(const MatrixBase<BaseFloat> &transform,
-                  OnlineFeatureInterface *src);
-
-
- private:
-  OnlineFeatureInterface *src_;  // Not owned here
-  Matrix<BaseFloat> linear_term_;
-  Vector<BaseFloat> offset_;
-};
-
-class OnlineDeltaFeature: public OnlineFeatureInterface {
- public:
-  //
-  // First, functions that are present in the interface:
-  //
-  virtual int32 Dim() const;
-
-  virtual bool IsLastFrame(int32 frame) const {
-    return src_->IsLastFrame(frame);
-  }
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return src_->FrameShiftInSeconds();
-  }
-
-  virtual int32 NumFramesReady() const;
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  //
-  // Next, functions that are not in the interface.
-  //
-  OnlineDeltaFeature(const DeltaFeaturesOptions &opts,
-                     OnlineFeatureInterface *src);
-
- private:
-  OnlineFeatureInterface *src_;  // Not owned here
-  DeltaFeaturesOptions opts_;
-  DeltaFeatures delta_features_;  // This class contains just a few
-                                  // coefficients.
-};
-
-
-/// This feature type can be used to cache its input, to avoid
-/// repetition of computation in a multi-pass decoding context.
-class OnlineCacheFeature: public OnlineFeatureInterface {
- public:
-  virtual int32 Dim() const { return src_->Dim(); }
-
-  virtual bool IsLastFrame(int32 frame) const {
-    return src_->IsLastFrame(frame);
-  }
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return src_->FrameShiftInSeconds();
-  }
-
-  virtual int32 NumFramesReady() const { return src_->NumFramesReady(); }
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  virtual void GetFrames(const std::vector<int32> &frames,
-                         MatrixBase<BaseFloat> *feats);
-
-  virtual ~OnlineCacheFeature() { ClearCache(); }
-
-  // Things that are not in the shared interface:
-
-  void ClearCache();  // this should be called if you change the underlying
-                      // features in some way.
-
-  explicit OnlineCacheFeature(OnlineFeatureInterface *src): src_(src) { }
- private:
-
-  OnlineFeatureInterface *src_;  // Not owned here
-  std::vector<Vector<BaseFloat>* > cache_;
-};
-
-
-
-
-/// This online-feature class implements combination of two feature
-/// streams (such as pitch, plp) into one stream.
-class OnlineAppendFeature: public OnlineFeatureInterface {
- public:
-  virtual int32 Dim() const { return src1_->Dim() + src2_->Dim(); }
-
-  virtual bool IsLastFrame(int32 frame) const {
-    return (src1_->IsLastFrame(frame) || src2_->IsLastFrame(frame));
-  }
-  // Hopefully sources have the same rate
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return src1_->FrameShiftInSeconds();
-  }
-
-  virtual int32 NumFramesReady() const {
-    return std::min(src1_->NumFramesReady(), src2_->NumFramesReady());
-  }
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  virtual ~OnlineAppendFeature() {  }
-
-  OnlineAppendFeature(OnlineFeatureInterface *src1,
-      OnlineFeatureInterface *src2): src1_(src1), src2_(src2) { }
- private:
-
-  OnlineFeatureInterface *src1_;
-  OnlineFeatureInterface *src2_;
-};
-
-/// @} End of "addtogroup onlinefeat"
-}  // namespace kaldi
-
-#endif  // KALDI_FEAT_ONLINE_FEATURE_H_
--- a/speechx/speechx/kaldi/feat/pitch-functions.cc
+++ b/speechx/speechx/kaldi/feat/pitch-functions.cc
--- a/speechx/speechx/kaldi/feat/pitch-functions.h
+++ b/speechx/speechx/kaldi/feat/pitch-functions.h
@ -1,450 +0,0 @@
-// feat/pitch-functions.h
-
-// Copyright     2013  Pegah Ghahremani
-//               2014  IMSL, PKU-HKUST (author: Wei Shi)
-//               2014  Yanqing Sun, Junjie Wang,
-//                     Daniel Povey, Korbinian Riedhammer
-//                     Xin Lei
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_PITCH_FUNCTIONS_H_
-#define KALDI_FEAT_PITCH_FUNCTIONS_H_
-
-#include <cassert>
-#include <cstdlib>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-error.h"
-#include "feat/mel-computations.h"
-#include "feat/online-feature-itf.h"
-#include "matrix/matrix-lib.h"
-#include "util/common-utils.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-struct PitchExtractionOptions {
-  // FrameExtractionOptions frame_opts;
-  BaseFloat samp_freq;          // sample frequency in hertz
-  BaseFloat frame_shift_ms;     // in milliseconds.
-  BaseFloat frame_length_ms;    // in milliseconds.
-  BaseFloat preemph_coeff;      // Preemphasis coefficient. [use is deprecated.]
-  BaseFloat min_f0;             // min f0 to search (Hz)
-  BaseFloat max_f0;             // max f0 to search (Hz)
-  BaseFloat soft_min_f0;        // Minimum f0, applied in soft way, must not
-                                // exceed min-f0
-  BaseFloat penalty_factor;     // cost factor for FO change
-  BaseFloat lowpass_cutoff;     // cutoff frequency for Low pass filter
-  BaseFloat resample_freq;      // Integer that determines filter width when
-                                // upsampling NCCF
-  BaseFloat delta_pitch;        // the pitch tolerance in pruning lags
-  BaseFloat nccf_ballast;       // Increasing this factor reduces NCCF for
-                                // quiet frames, helping ensure pitch
-                                // continuity in unvoiced region
-  int32 lowpass_filter_width;   // Integer that determines filter width of
-                                // lowpass filter
-  int32 upsample_filter_width;  // Integer that determines filter width when
-                                // upsampling NCCF
-
-  // Below are newer config variables, not present in the original paper,
-  // that relate to the online pitch extraction algorithm.
-
-  // The maximum number of frames of latency that we allow the pitch-processing
-  // to introduce, for online operation. If you set this to a large value,
-  // there would be no inaccuracy from the Viterbi traceback (but it might make
-  // you wait to see the pitch). This is not very relevant for the online
-  // operation: normalization-right-context is more relevant, you
-  // can just leave this value at zero.
-  int32 max_frames_latency;
-
-  // Only relevant for the function ComputeKaldiPitch which is called by
-  // compute-kaldi-pitch-feats. If nonzero, we provide the input as chunks of
-  // this size. This affects the energy normalization which has a small effect
-  // on the resulting features, especially at the beginning of a file. For best
-  // compatibility with online operation (e.g. if you plan to train models for
-  // the online-deocding setup), you might want to set this to a small value,
-  // like one frame.
-  int32 frames_per_chunk;
-
-  // Only relevant for the function ComputeKaldiPitch which is called by
-  // compute-kaldi-pitch-feats, and only relevant if frames_per_chunk is
-  // nonzero. If true, it will query the features as soon as they are
-  // available, which simulates the first-pass features you would get in online
-  // decoding. If false, the features you will get will be the same as those
-  // available at the end of the utterance, after InputFinished() has been
-  // called: e.g. during lattice rescoring.
-  bool simulate_first_pass_online;
-
-  // Only relevant for online operation or when emulating online operation
-  // (e.g. when setting frames_per_chunk). This is the frame-index on which we
-  // recompute the NCCF (e.g. frame-index 500 = after 5 seconds); if the
-  // segment ends before this we do it when the segment ends. We do this by
-  // re-computing the signal average energy, which affects the NCCF via the
-  // "ballast term", scaling the resampled NCCF by a factor derived from the
-  // average change in the "ballast term", and re-doing the backtrace
-  // computation. Making this infinity would be the most exact, but would
-  // introduce unwanted latency at the end of long utterances, for little
-  // benefit.
-  int32 recompute_frame;
-
-  // This is a "hidden config" used only for testing the online pitch
-  // extraction. If true, we compute the signal root-mean-squared for the
-  // ballast term, only up to the current frame, rather than the end of the
-  // current chunk of signal. This makes the output insensitive to the
-  // chunking, which is useful for testing purposes.
-  bool nccf_ballast_online;
-  bool snip_edges;
-  PitchExtractionOptions():
-      samp_freq(16000),
-      frame_shift_ms(10.0),
-      frame_length_ms(25.0),
-      preemph_coeff(0.0),
-      min_f0(50),
-      max_f0(400),
-      soft_min_f0(10.0),
-      penalty_factor(0.1),
-      lowpass_cutoff(1000),
-      resample_freq(4000),
-      delta_pitch(0.005),
-      nccf_ballast(7000),
-      lowpass_filter_width(1),
-      upsample_filter_width(5),
-      max_frames_latency(0),
-      frames_per_chunk(0),
-      simulate_first_pass_online(false),
-      recompute_frame(500),
-      nccf_ballast_online(false),
-      snip_edges(true) { }
-
-  void Register(OptionsItf *opts) {
-    opts->Register("sample-frequency", &samp_freq,
-                   "Waveform data sample frequency (must match the waveform "
-                   "file, if specified there)");
-    opts->Register("frame-length", &frame_length_ms, "Frame length in "
-                   "milliseconds");
-    opts->Register("frame-shift", &frame_shift_ms, "Frame shift in "
-                   "milliseconds");
-    opts->Register("preemphasis-coefficient", &preemph_coeff,
-                   "Coefficient for use in signal preemphasis (deprecated)");
-    opts->Register("min-f0", &min_f0,
-                   "min. F0 to search for (Hz)");
-    opts->Register("max-f0", &max_f0,
-                   "max. F0 to search for (Hz)");
-    opts->Register("soft-min-f0", &soft_min_f0,
-                   "Minimum f0, applied in soft way, must not exceed min-f0");
-    opts->Register("penalty-factor", &penalty_factor,
-                   "cost factor for FO change.");
-    opts->Register("lowpass-cutoff", &lowpass_cutoff,
-                   "cutoff frequency for LowPass filter (Hz) ");
-    opts->Register("resample-frequency", &resample_freq,
-                   "Frequency that we down-sample the signal to.  Must be "
-                   "more than twice lowpass-cutoff");
-    opts->Register("delta-pitch", &delta_pitch,
-                   "Smallest relative change in pitch that our algorithm "
-                   "measures");
-    opts->Register("nccf-ballast", &nccf_ballast,
-                   "Increasing this factor reduces NCCF for quiet frames");
-    opts->Register("nccf-ballast-online", &nccf_ballast_online,
-                   "This is useful mainly for debug; it affects how the NCCF "
-                   "ballast is computed.");
-    opts->Register("lowpass-filter-width", &lowpass_filter_width,
-                   "Integer that determines filter width of "
-                   "lowpass filter, more gives sharper filter");
-    opts->Register("upsample-filter-width", &upsample_filter_width,
-                   "Integer that determines filter width when upsampling NCCF");
-    opts->Register("frames-per-chunk", &frames_per_chunk, "Only relevant for "
-                   "offline pitch extraction (e.g. compute-kaldi-pitch-feats), "
-                   "you can set it to a small nonzero value, such as 10, for "
-                   "better feature compatibility with online decoding (affects "
-                   "energy normalization in the algorithm)");
-    opts->Register("simulate-first-pass-online", &simulate_first_pass_online,
-                   "If true, compute-kaldi-pitch-feats will output features "
-                   "that correspond to what an online decoder would see in the "
-                   "first pass of decoding-- not the final version of the "
-                   "features, which is the default.  Relevant if "
-                   "--frames-per-chunk > 0");
-    opts->Register("recompute-frame", &recompute_frame, "Only relevant for "
-                   "online pitch extraction, or for compatibility with online "
-                   "pitch extraction.  A non-critical parameter; the frame at "
-                   "which we recompute some of the forward pointers, after "
-                   "revising our estimate of the signal energy.  Relevant if"
-                   "--frames-per-chunk > 0");
-    opts->Register("max-frames-latency", &max_frames_latency, "Maximum number "
-                   "of frames of latency that we allow pitch tracking to "
-                   "introduce into the feature processing (affects output only "
-                   "if --frames-per-chunk > 0 and "
-                   "--simulate-first-pass-online=true");
-    opts->Register("snip-edges", &snip_edges, "If this is set to false, the "
-                   "incomplete frames near the ending edge won't be snipped, "
-                   "so that the number of frames is the file size divided by "
-                   "the frame-shift. This makes different types of features "
-                   "give the same number of frames.");
-  }
-  /// Returns the window-size in samples, after resampling.  This is the
-  /// "basic window size", not the full window size after extending by max-lag.
-  // Because of floating point representation, it is more reliable to divide
-  // by 1000 instead of multiplying by 0.001, but it is a bit slower.
-  int32 NccfWindowSize() const {
-    return static_cast<int32>(resample_freq * frame_length_ms / 1000.0);
-  }
-  /// Returns the window-shift in samples, after resampling.
-  int32 NccfWindowShift() const {
-    return static_cast<int32>(resample_freq * frame_shift_ms / 1000.0);
-  }
-};
-
-struct ProcessPitchOptions {
-  BaseFloat pitch_scale;  // the final normalized-log-pitch feature is scaled
-                          // with this value
-  BaseFloat pov_scale;    // the final POV feature is scaled with this value
-  BaseFloat pov_offset;   // An offset that can be added to the final POV
-                          // feature (useful for online-decoding, where we don't
-                          // do CMN to the pitch-derived features.
-
-  BaseFloat delta_pitch_scale;
-  BaseFloat delta_pitch_noise_stddev;  // stddev of noise we add to delta-pitch
-  int32 normalization_left_context;    // left-context used for sliding-window
-                                       // normalization
-  int32 normalization_right_context;   // this should be reduced in online
-                                       // decoding to reduce latency
-
-  int32 delta_window;
-  int32 delay;
-
-  bool add_pov_feature;
-  bool add_normalized_log_pitch;
-  bool add_delta_pitch;
-  bool add_raw_log_pitch;
-
-  ProcessPitchOptions() :
-      pitch_scale(2.0),
-      pov_scale(2.0),
-      pov_offset(0.0),
-      delta_pitch_scale(10.0),
-      delta_pitch_noise_stddev(0.005),
-      normalization_left_context(75),
-      normalization_right_context(75),
-      delta_window(2),
-      delay(0),
-      add_pov_feature(true),
-      add_normalized_log_pitch(true),
-      add_delta_pitch(true),
-      add_raw_log_pitch(false) { }
-
-
-  void Register(ParseOptions *opts) {
-    opts->Register("pitch-scale", &pitch_scale,
-                   "Scaling factor for the final normalized log-pitch value");
-    opts->Register("pov-scale", &pov_scale,
-                   "Scaling factor for final POV (probability of voicing) "
-                   "feature");
-    opts->Register("pov-offset", &pov_offset,
-                   "This can be used to add an offset to the POV feature. "
-                   "Intended for use in online decoding as a substitute for "
-                   " CMN.");
-    opts->Register("delta-pitch-scale", &delta_pitch_scale,
-                   "Term to scale the final delta log-pitch feature");
-    opts->Register("delta-pitch-noise-stddev", &delta_pitch_noise_stddev,
-                   "Standard deviation for noise we add to the delta log-pitch "
-                   "(before scaling); should be about the same as delta-pitch "
-                   "option to pitch creation.  The purpose is to get rid of "
-                   "peaks in the delta-pitch caused by discretization of pitch "
-                   "values.");
-    opts->Register("normalization-left-context", &normalization_left_context,
-                   "Left-context (in frames) for moving window normalization");
-    opts->Register("normalization-right-context", &normalization_right_context,
-                   "Right-context (in frames) for moving window normalization");
-    opts->Register("delta-window", &delta_window,
-                   "Number of frames on each side of central frame, to use for "
-                   "delta window.");
-    opts->Register("delay", &delay,
-                   "Number of frames by which the pitch information is "
-                   "delayed.");
-    opts->Register("add-pov-feature", &add_pov_feature,
-                   "If true, the warped NCCF is added to output features");
-    opts->Register("add-normalized-log-pitch", &add_normalized_log_pitch,
-                   "If true, the log-pitch with POV-weighted mean subtraction "
-                   "over 1.5 second window is added to output features");
-    opts->Register("add-delta-pitch", &add_delta_pitch,
-                   "If true, time derivative of log-pitch is added to output "
-                   "features");
-    opts->Register("add-raw-log-pitch", &add_raw_log_pitch,
-                   "If true, log(pitch) is added to output features");
-  }
-};
-
-
-// We don't want to expose the pitch-extraction internals here as it's
-// quite complex, so we use a private implementation.
-class OnlinePitchFeatureImpl;
-
-
-// Note: to start on a new waveform, just construct a new version
-// of this object.
-class OnlinePitchFeature: public OnlineBaseFeature {
- public:
-  explicit OnlinePitchFeature(const PitchExtractionOptions &opts);
-
-  virtual int32 Dim() const { return 2; /* (NCCF, pitch) */ }
-
-  virtual int32 NumFramesReady() const;
-
-  virtual BaseFloat FrameShiftInSeconds() const;
-
-  virtual bool IsLastFrame(int32 frame) const;
-
-  /// Outputs the two-dimensional feature consisting of (pitch, NCCF).  You
-  /// should probably post-process this using class OnlineProcessPitch.
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  virtual void AcceptWaveform(BaseFloat sampling_rate,
-                              const VectorBase<BaseFloat> &waveform);
-
-  virtual void InputFinished();
-
-  virtual ~OnlinePitchFeature();
-
- private:
-  OnlinePitchFeatureImpl *impl_;
-};
-
-
-/// This online-feature class implements post processing of pitch features.
-/// Inputs are original 2 dims (nccf, pitch).  It can produce various
-/// kinds of outputs, using the default options it will be (pov-feature,
-/// normalized-log-pitch, delta-log-pitch).
-class OnlineProcessPitch: public OnlineFeatureInterface {
- public:
-  virtual int32 Dim() const { return dim_; }
-
-  virtual bool IsLastFrame(int32 frame) const {
-    if (frame <= -1)
-      return src_->IsLastFrame(-1);
-    else if (frame < opts_.delay)
-      return src_->IsLastFrame(-1) == true ? false : src_->IsLastFrame(0);
-    else
-      return src_->IsLastFrame(frame - opts_.delay);
-  }
-  virtual BaseFloat FrameShiftInSeconds() const {
-    return src_->FrameShiftInSeconds();
-  }
-
-  virtual int32 NumFramesReady() const;
-
-  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
-
-  virtual ~OnlineProcessPitch() {  }
-
-  // Does not take ownership of "src".
-  OnlineProcessPitch(const ProcessPitchOptions &opts,
-                     OnlineFeatureInterface *src);
-
- private:
-  enum { kRawFeatureDim = 2};  // anonymous enum to define a constant.
-                               // kRawFeatureDim defines the dimension
-                               // of the input: (nccf, pitch)
-
-  ProcessPitchOptions opts_;
-  OnlineFeatureInterface *src_;
-  int32 dim_;  // Output feature dimension, set in initializer.
-
-  struct NormalizationStats {
-    int32 cur_num_frames;      // value of src_->NumFramesReady() when
-                               // "mean_pitch" was set.
-    bool input_finished;       // true if input data was finished when
-                               // "mean_pitch" was computed.
-    double sum_pov;            // sum of pov over relevant range
-    double sum_log_pitch_pov;  // sum of log(pitch) * pov over relevant range
-
-    NormalizationStats(): cur_num_frames(-1), input_finished(false),
-                          sum_pov(0.0), sum_log_pitch_pov(0.0) { }
-  };
-
-  std::vector<BaseFloat> delta_feature_noise_;
-
-  std::vector<NormalizationStats> normalization_stats_;
-
-  /// Computes and returns the POV feature for this frame.
-  /// Called from GetFrame().
-  inline BaseFloat GetPovFeature(int32 frame) const;
-
-  /// Computes and returns the delta-log-pitch feature for this frame.
-  /// Called from GetFrame().
-  inline BaseFloat GetDeltaPitchFeature(int32 frame);
-
-  /// Computes and returns the raw log-pitch feature for this frame.
-  /// Called from GetFrame().
-  inline BaseFloat GetRawLogPitchFeature(int32 frame) const;
-
-  /// Computes and returns the mean-subtracted log-pitch feature for this frame.
-  /// Called from GetFrame().
-  inline BaseFloat GetNormalizedLogPitchFeature(int32 frame);
-
-  /// Computes the normalization window sizes.
-  inline void GetNormalizationWindow(int32 frame,
-                                     int32 src_frames_ready,
-                                     int32 *window_begin,
-                                     int32 *window_end) const;
-
-  /// Makes sure the entry in normalization_stats_ for this frame is up to date;
-  /// called from GetNormalizedLogPitchFeature.
-  inline void UpdateNormalizationStats(int32 frame);
-};
-
-
-/// This function extracts (pitch, NCCF) per frame, using the pitch extraction
-/// method described in "A Pitch Extraction Algorithm Tuned for Automatic Speech
-/// Recognition", Pegah Ghahremani, Bagher BabaAli, Daniel Povey, Korbinian
-/// Riedhammer, Jan Trmal and Sanjeev Khudanpur, ICASSP 2014.  The output will
-/// have as many rows as there are frames, and two columns corresponding to
-/// (NCCF, pitch)
-void ComputeKaldiPitch(const PitchExtractionOptions &opts,
-                       const VectorBase<BaseFloat> &wave,
-                       Matrix<BaseFloat> *output);
-
-/// This function processes the raw (NCCF, pitch) quantities computed by
-/// ComputeKaldiPitch, and processes them into features.  By default it will
-/// output three-dimensional features, (POV-feature, mean-subtracted-log-pitch,
-/// delta-of-raw-pitch), but this is configurable in the options.  The number of
-/// rows of "output" will be the number of frames (rows) in "input", and the
-/// number of columns will be the number of different types of features
-/// requested (by default, 3; 4 is the max).  The four config variables
-/// --add-pov-feature, --add-normalized-log-pitch, --add-delta-pitch,
-/// --add-raw-log-pitch determine which features we create; by default we create
-/// the first three.
-void ProcessPitch(const ProcessPitchOptions &opts,
-                  const MatrixBase<BaseFloat> &input,
-                  Matrix<BaseFloat> *output);
-
-/// This function combines ComputeKaldiPitch and ProcessPitch.  The reason
-/// why we need a separate function to do this is in order to be able to
-/// accurately simulate the online pitch-processing, for testing and for
-/// training models matched to the "first-pass" features.  It is sensitive to
-/// the variables in pitch_opts that relate to online processing,
-/// i.e. max_frames_latency, frames_per_chunk, simulate_first_pass_online,
-/// recompute_frame.
-void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts,
-                                 const ProcessPitchOptions &process_opts,
-                                 const VectorBase<BaseFloat> &wave,
-                                 Matrix<BaseFloat> *output);
-
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-#endif  // KALDI_FEAT_PITCH_FUNCTIONS_H_
--- a/speechx/speechx/kaldi/feat/resample.cc
+++ b/speechx/speechx/kaldi/feat/resample.cc
@ -1,377 +0,0 @@
-// feat/resample.cc
-
-// Copyright    2013  Pegah Ghahremani
-//              2014  IMSL, PKU-HKUST (author: Wei Shi)
-//              2014  Yanqing Sun, Junjie Wang
-//              2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <algorithm>
-#include <limits>
-#include "feat/feature-functions.h"
-#include "matrix/matrix-functions.h"
-#include "feat/resample.h"
-
-namespace kaldi {
-
-
-LinearResample::LinearResample(int32 samp_rate_in_hz,
-                               int32 samp_rate_out_hz,
-                               BaseFloat filter_cutoff_hz,
-                               int32 num_zeros):
-    samp_rate_in_(samp_rate_in_hz),
-    samp_rate_out_(samp_rate_out_hz),
-    filter_cutoff_(filter_cutoff_hz),
-    num_zeros_(num_zeros) {
-  KALDI_ASSERT(samp_rate_in_hz > 0.0 &&
-               samp_rate_out_hz > 0.0 &&
-               filter_cutoff_hz > 0.0 &&
-               filter_cutoff_hz*2 <= samp_rate_in_hz &&
-               filter_cutoff_hz*2 <= samp_rate_out_hz &&
-               num_zeros > 0);
-
-  // base_freq is the frequency of the repeating unit, which is the gcd
-  // of the input frequencies.
-  int32 base_freq = Gcd(samp_rate_in_, samp_rate_out_);
-  input_samples_in_unit_ = samp_rate_in_ / base_freq;
-  output_samples_in_unit_ = samp_rate_out_ / base_freq;
-
-  SetIndexesAndWeights();
-  Reset();
-}
-
-int64 LinearResample::GetNumOutputSamples(int64 input_num_samp,
-                                          bool flush) const {
-  // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
-  // where tick_freq is the least common multiple of samp_rate_in_ and
-  // samp_rate_out_.
-  int32 tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
-  int32 ticks_per_input_period = tick_freq / samp_rate_in_;
-
-  // work out the number of ticks in the time interval
-  // [ 0, input_num_samp/samp_rate_in_ ).
-  int64 interval_length_in_ticks = input_num_samp * ticks_per_input_period;
-  if (!flush) {
-    BaseFloat window_width = num_zeros_ / (2.0 * filter_cutoff_);
-    // To count the window-width in ticks we take the floor.  This
-    // is because since we're looking for the largest integer num-out-samp
-    // that fits in the interval, which is open on the right, a reduction
-    // in interval length of less than a tick will never make a difference.
-    // For example, the largest integer in the interval [ 0, 2 ) and the
-    // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
-    // So when we're subtracting the window-width we can ignore the fractional
-    // part.
-    int32 window_width_ticks = floor(window_width * tick_freq);
-    // The time-period of the output that we can sample gets reduced
-    // by the window-width (which is actually the distance from the
-    // center to the edge of the windowing function) if we're not
-    // "flushing the output".
-    interval_length_in_ticks -= window_width_ticks;
-  }
-  if (interval_length_in_ticks <= 0)
-    return 0;
-  int32 ticks_per_output_period = tick_freq / samp_rate_out_;
-  // Get the last output-sample in the closed interval, i.e. replacing [ ) with
-  // [ ].  Note: integer division rounds down.  See
-  // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
-  // the notation.
-  int64 last_output_samp = interval_length_in_ticks / ticks_per_output_period;
-  // We need the last output-sample in the open interval, so if it takes us to
-  // the end of the interval exactly, subtract one.
-  if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
-    last_output_samp--;
-  // First output-sample index is zero, so the number of output samples
-  // is the last output-sample plus one.
-  int64 num_output_samp = last_output_samp + 1;
-  return num_output_samp;
-}
-
-void LinearResample::SetIndexesAndWeights() {
-  first_index_.resize(output_samples_in_unit_);
-  weights_.resize(output_samples_in_unit_);
-
-  double window_width = num_zeros_ / (2.0 * filter_cutoff_);
-
-  for (int32 i = 0; i < output_samples_in_unit_; i++) {
-    double output_t = i / static_cast<double>(samp_rate_out_);
-    double min_t = output_t - window_width, max_t = output_t + window_width;
-    // we do ceil on the min and floor on the max, because if we did it
-    // the other way around we would unnecessarily include indexes just
-    // outside the window, with zero coefficients.  It's possible
-    // if the arguments to the ceil and floor expressions are integers
-    // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
-    // that we unnecessarily include something with a zero coefficient,
-    // but this is only a slight efficiency issue.
-    int32 min_input_index = ceil(min_t * samp_rate_in_),
-        max_input_index = floor(max_t * samp_rate_in_),
-        num_indices = max_input_index - min_input_index + 1;
-    first_index_[i] = min_input_index;
-    weights_[i].Resize(num_indices);
-    for (int32 j = 0; j < num_indices; j++) {
-      int32 input_index = min_input_index + j;
-      double input_t = input_index / static_cast<double>(samp_rate_in_),
-          delta_t = input_t - output_t;
-      // sign of delta_t doesn't matter.
-      weights_[i](j) = FilterFunc(delta_t) / samp_rate_in_;
-    }
-  }
-}
-
-
-// inline
-void LinearResample::GetIndexes(int64 samp_out,
-                                int64 *first_samp_in,
-                                int32 *samp_out_wrapped) const {
-  // A unit is the smallest nonzero amount of time that is an exact
-  // multiple of the input and output sample periods.  The unit index
-  // is the answer to "which numbered unit we are in".
-  int64 unit_index = samp_out / output_samples_in_unit_;
-  // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
-  *samp_out_wrapped = static_cast<int32>(samp_out -
-                                         unit_index * output_samples_in_unit_);
-  *first_samp_in = first_index_[*samp_out_wrapped] +
-      unit_index * input_samples_in_unit_;
-}
-
-
-void LinearResample::Resample(const VectorBase<BaseFloat> &input,
-                              bool flush,
-                              Vector<BaseFloat> *output) {
-  int32 input_dim = input.Dim();
-  int64 tot_input_samp = input_sample_offset_ + input_dim,
-      tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
-
-  KALDI_ASSERT(tot_output_samp >= output_sample_offset_);
-
-  output->Resize(tot_output_samp - output_sample_offset_);
-
-  // samp_out is the index into the total output signal, not just the part
-  // of it we are producing here.
-  for (int64 samp_out = output_sample_offset_;
-       samp_out < tot_output_samp;
-       samp_out++) {
-    int64 first_samp_in;
-    int32 samp_out_wrapped;
-    GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
-    const Vector<BaseFloat> &weights = weights_[samp_out_wrapped];
-    // first_input_index is the first index into "input" that we have a weight
-    // for.
-    int32 first_input_index = static_cast<int32>(first_samp_in -
-                                                 input_sample_offset_);
-    BaseFloat this_output;
-    if (first_input_index >= 0 &&
-        first_input_index + weights.Dim() <= input_dim) {
-      SubVector<BaseFloat> input_part(input, first_input_index, weights.Dim());
-      this_output = VecVec(input_part, weights);
-    } else {  // Handle edge cases.
-      this_output = 0.0;
-      for (int32 i = 0; i < weights.Dim(); i++) {
-        BaseFloat weight = weights(i);
-        int32 input_index = first_input_index + i;
-        if (input_index < 0 && input_remainder_.Dim() + input_index >= 0) {
-          this_output += weight *
-              input_remainder_(input_remainder_.Dim() + input_index);
-        } else if (input_index >= 0 && input_index < input_dim) {
-          this_output += weight * input(input_index);
-        } else if (input_index >= input_dim) {
-          // We're past the end of the input and are adding zero; should only
-          // happen if the user specified flush == true, or else we would not
-          // be trying to output this sample.
-          KALDI_ASSERT(flush);
-        }
-      }
-    }
-    int32 output_index = static_cast<int32>(samp_out - output_sample_offset_);
-    (*output)(output_index) = this_output;
-  }
-
-  if (flush) {
-    Reset();  // Reset the internal state.
-  } else {
-    SetRemainder(input);
-    input_sample_offset_ = tot_input_samp;
-    output_sample_offset_ = tot_output_samp;
-  }
-}
-
-void LinearResample::SetRemainder(const VectorBase<BaseFloat> &input) {
-  Vector<BaseFloat> old_remainder(input_remainder_);
-  // max_remainder_needed is the width of the filter from side to side,
-  // measured in input samples.  you might think it should be half that,
-  // but you have to consider that you might be wanting to output samples
-  // that are "in the past" relative to the beginning of the latest
-  // input... anyway, storing more remainder than needed is not harmful.
-  int32 max_remainder_needed = ceil(samp_rate_in_ * num_zeros_ /
-                                    filter_cutoff_);
-  input_remainder_.Resize(max_remainder_needed);
-  for (int32 index = - input_remainder_.Dim(); index < 0; index++) {
-    // we interpret "index" as an offset from the end of "input" and
-    // from the end of input_remainder_.
-    int32 input_index = index + input.Dim();
-    if (input_index >= 0)
-      input_remainder_(index + input_remainder_.Dim()) = input(input_index);
-    else if (input_index + old_remainder.Dim() >= 0)
-      input_remainder_(index + input_remainder_.Dim()) =
-          old_remainder(input_index + old_remainder.Dim());
-    // else leave it at zero.
-  }
-}
-
-void LinearResample::Reset() {
-  input_sample_offset_ = 0;
-  output_sample_offset_ = 0;
-  input_remainder_.Resize(0);
-}
-
-/** Here, t is a time in seconds representing an offset from
-    the center of the windowed filter function, and FilterFunction(t)
-    returns the windowed filter function, described
-    in the header as h(t) = f(t)g(t), evaluated at t.
-*/
-BaseFloat LinearResample::FilterFunc(BaseFloat t) const {
-  BaseFloat window,  // raised-cosine (Hanning) window of width
-                  // num_zeros_/2*filter_cutoff_
-      filter;  // sinc filter function
-  if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
-    window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
-  else
-    window = 0.0;  // outside support of window function
-  if (t != 0)
-    filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
-  else
-    filter = 2 * filter_cutoff_;  // limit of the function at t = 0
-  return filter * window;
-}
-
-
-ArbitraryResample::ArbitraryResample(
-    int32 num_samples_in, BaseFloat samp_rate_in,
-    BaseFloat filter_cutoff, const Vector<BaseFloat> &sample_points,
-    int32 num_zeros):
-    num_samples_in_(num_samples_in),
-    samp_rate_in_(samp_rate_in),
-    filter_cutoff_(filter_cutoff),
-    num_zeros_(num_zeros) {
-  KALDI_ASSERT(num_samples_in > 0 && samp_rate_in > 0.0 &&
-               filter_cutoff > 0.0 &&
-               filter_cutoff * 2.0 <= samp_rate_in
-               && num_zeros > 0);
-  // set up weights_ and indices_.  Please try to keep all functions short and
-  SetIndexes(sample_points);
-  SetWeights(sample_points);
-}
-
-
-void ArbitraryResample::Resample(const MatrixBase<BaseFloat> &input,
-                                 MatrixBase<BaseFloat> *output) const {
-  // each row of "input" corresponds to the data to resample;
-  // the corresponding row of "output" is the resampled data.
-
-  KALDI_ASSERT(input.NumRows() == output->NumRows() &&
-               input.NumCols() == num_samples_in_ &&
-               output->NumCols() == weights_.size());
-
-  Vector<BaseFloat> output_col(output->NumRows());
-  for (int32 i = 0; i < NumSamplesOut(); i++) {
-    SubMatrix<BaseFloat> input_part(input, 0, input.NumRows(),
-                                    first_index_[i],
-                                    weights_[i].Dim());
-    const Vector<BaseFloat> &weight_vec(weights_[i]);
-    output_col.AddMatVec(1.0, input_part,
-                         kNoTrans, weight_vec, 0.0);
-    output->CopyColFromVec(output_col, i);
-  }
-}
-
-void ArbitraryResample::Resample(const VectorBase<BaseFloat> &input,
-                                 VectorBase<BaseFloat> *output) const {
-  KALDI_ASSERT(input.Dim() == num_samples_in_ &&
-               output->Dim() == weights_.size());
-
-  int32 output_dim = output->Dim();
-  for (int32 i = 0; i < output_dim; i++) {
-    SubVector<BaseFloat> input_part(input, first_index_[i], weights_[i].Dim());
-    (*output)(i) = VecVec(input_part, weights_[i]);
-  }
-}
-
-void ArbitraryResample::SetIndexes(const Vector<BaseFloat> &sample_points) {
-  int32 num_samples = sample_points.Dim();
-  first_index_.resize(num_samples);
-  weights_.resize(num_samples);
-  BaseFloat filter_width = num_zeros_ / (2.0 * filter_cutoff_);
-  for (int32  i = 0; i < num_samples; i++) {
-    // the t values are in seconds.
-    BaseFloat t = sample_points(i),
-        t_min = t - filter_width, t_max = t + filter_width;
-    int32 index_min = ceil(samp_rate_in_ * t_min),
-        index_max = floor(samp_rate_in_ * t_max);
-    // the ceil on index min and the floor on index_max are because there
-    // is no point using indices just outside the window (coeffs would be zero).
-    if (index_min < 0)
-      index_min = 0;
-    if (index_max >= num_samples_in_)
-      index_max = num_samples_in_ - 1;
-    first_index_[i] = index_min;
-    weights_[i].Resize(index_max - index_min + 1);
-  }
-}
-
-void ArbitraryResample::SetWeights(const Vector<BaseFloat> &sample_points) {
-  int32 num_samples_out = NumSamplesOut();
-  for (int32 i = 0; i < num_samples_out; i++) {
-    for (int32 j = 0 ; j < weights_[i].Dim(); j++) {
-      BaseFloat delta_t = sample_points(i) -
-          (first_index_[i] + j) / samp_rate_in_;
-      // Include at this point the factor of 1.0 / samp_rate_in_ which
-      // appears in the math.
-      weights_[i](j) = FilterFunc(delta_t) / samp_rate_in_;
-    }
-  }
-}
-
-/** Here, t is a time in seconds representing an offset from
-    the center of the windowed filter function, and FilterFunction(t)
-    returns the windowed filter function, described
-    in the header as h(t) = f(t)g(t), evaluated at t.
-*/
-BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
-  BaseFloat window,  // raised-cosine (Hanning) window of width
-                  // num_zeros_/2*filter_cutoff_
-      filter;  // sinc filter function
-  if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
-    window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
-  else
-    window = 0.0;  // outside support of window function
-  if (t != 0.0)
-    filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
-  else
-    filter = 2.0 * filter_cutoff_;  // limit of the function at zero.
-  return filter * window;
-}
-
-void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
-                      BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
-  BaseFloat min_freq = std::min(orig_freq, new_freq);
-  BaseFloat lowpass_cutoff = 0.99 * 0.5 * min_freq;
-  int32 lowpass_filter_width = 6;
-  LinearResample resampler(orig_freq, new_freq,
-                           lowpass_cutoff, lowpass_filter_width);
-  resampler.Resample(wave, true, new_wave);
-}
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/feat/resample.h
+++ b/speechx/speechx/kaldi/feat/resample.h
@ -1,287 +0,0 @@
-// feat/resample.h
-
-// Copyright     2013  Pegah Ghahremani
-//               2014  IMSL, PKU-HKUST (author: Wei Shi)
-//               2014  Yanqing Sun, Junjie Wang
-//               2014  Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_FEAT_RESAMPLE_H_
-#define KALDI_FEAT_RESAMPLE_H_
-
-#include <cassert>
-#include <cstdlib>
-#include <string>
-#include <vector>
-
-
-#include "matrix/matrix-lib.h"
-#include "util/common-utils.h"
-#include "base/kaldi-error.h"
-
-namespace kaldi {
-/// @addtogroup  feat FeatureExtraction
-/// @{
-
-/**
-   \file[resample.h]
-
-   This header contains declarations of classes for resampling signals.  The
-   normal cases of resampling a signal are upsampling and downsampling
-   (increasing and decreasing the sample rate of a signal, respectively),
-   although the ArbitraryResample class allows a more generic case where
-   we want to get samples of a signal at uneven intervals (for instance,
-   log-spaced).
-
-   The input signal is always evenly spaced, say sampled with frequency S, and
-   we assume the original signal was band-limited to S/2 or lower.  The n'th
-   input sample x_n (with n = 0, 1, ...) is interpreted as the original
-   signal's value at time n/S.
-
-   For resampling, it is convenient to view the input signal as a
-   continuous function x(t) of t, where each sample x_n becomes a delta function
-   with magnitude x_n/S, at time n/S.  If we band limit this to the Nyquist
-   frequency S/2, we can show that this is the same as the original signal
-   that was sampled. [assuming the original signal was periodic and band
-   limited.]  In general we want to bandlimit to lower than S/2, because
-   we don't have a perfect filter and also because if we want to resample
-   at a lower frequency than S, we need to bandlimit to below half of that.
-   Anyway, suppose we want to bandlimit to C, with 0 < C < S/2.  The perfect
-   rectangular filter with cutoff C is the sinc function,
-   \f[         f(t) = 2C sinc(2Ct),                   \f]
-   where sinc is the normalized sinc function \f$ sinc(t) = sin(pi t) / (pi t) \f$, with
-  \f$  sinc(0) = 1 \f$.  This is not a practical filter, though, because it has
-   infinite support.  At the cost of less-than-perfect rolloff, we can choose
-   a suitable windowing function g(t), and use f(t) g(t) as the filter.  For
-   a windowing function we choose raised-cosine (Hanning) window with support
-   on [-w/2C, w/2C], where w >= 2 is an integer chosen by the user.  w = 1
-   means we window the sinc function out to its first zero on the left and right,
-   w = 2 means the second zero, and so on; we normally choose w to be at least two.
-   We call this num_zeros, not w, in the code.
-
-   Convolving the signal x(t) with this windowed filter h(t) = f(t)g(t) and evaluating the resulting
-   signal s(t) at an arbitrary time t is easy: we have
-    \f[          s(t) = 1/S \sum_n x_n h(t - n/S)        \f].
-   (note: the sign of t - n/S might be wrong, but it doesn't matter as the filter
-   and window are symmetric).
-   This is true for arbitrary values of t.  What the class ArbitraryResample does
-   is to allow you to evaluate the signal for specified values of t.
-*/
-
-
-/**
-   Class ArbitraryResample allows you to resample a signal (assumed zero outside
-   the sample region, not periodic) at arbitrary specified time values, which
-   don't have to be linearly spaced.  The low-pass filter cutoff
-   "filter_cutoff_hz" should be less than half the sample rate;
-   "num_zeros" should probably be at least two preferably more; higher numbers give
-   sharper filters but will be less efficient.
-*/
-class ArbitraryResample {
- public:
-  ArbitraryResample(int32 num_samples_in,
-                    BaseFloat samp_rate_hz,
-                    BaseFloat filter_cutoff_hz,
-                    const Vector<BaseFloat> &sample_points_secs,
-                    int32 num_zeros);
-
-  int32 NumSamplesIn() const { return num_samples_in_; }
-
-  int32 NumSamplesOut() const { return weights_.size(); }
-
-  /// This function does the resampling.
-  /// input.NumRows() and output.NumRows() should be equal
-  /// and nonzero.
-  /// input.NumCols() should equal NumSamplesIn()
-  /// and output.NumCols() should equal NumSamplesOut().
-  void Resample(const MatrixBase<BaseFloat> &input,
-                MatrixBase<BaseFloat> *output) const;
-
-  /// This version of the Resample function processes just
-  /// one vector.
-  void Resample(const VectorBase<BaseFloat> &input,
-                VectorBase<BaseFloat> *output) const;
- private:
-  void SetIndexes(const Vector<BaseFloat> &sample_points);
-
-  void SetWeights(const Vector<BaseFloat> &sample_points);
-
-  BaseFloat FilterFunc(BaseFloat t) const;
-
-  int32 num_samples_in_;
-  BaseFloat samp_rate_in_;
-  BaseFloat filter_cutoff_;
-  int32 num_zeros_;
-
-  std::vector<int32> first_index_;  // The first input-sample index that we sum
-                                    // over, for this output-sample index.
-  std::vector<Vector<BaseFloat> > weights_;
-};
-
-
-/**
-   LinearResample is a special case of ArbitraryResample, where we want to
-   resample a signal at linearly spaced intervals (this means we want to
-   upsample or downsample the signal).  It is more efficient than
-   ArbitraryResample because we can construct it just once.
-
-   We require that the input and output sampling rate be specified as
-   integers, as this is an easy way to specify that their ratio be rational.
-*/
-
-class LinearResample {
- public:
-  /// Constructor.  We make the input and output sample rates integers, because
-  /// we are going to need to find a common divisor.  This should just remind
-  /// you that they need to be integers.  The filter cutoff needs to be less
-  /// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2.  num_zeros
-  /// controls the sharpness of the filter, more == sharper but less efficient.
-  /// We suggest around 4 to 10 for normal use.
-  LinearResample(int32 samp_rate_in_hz,
-                 int32 samp_rate_out_hz,
-                 BaseFloat filter_cutoff_hz,
-                 int32 num_zeros);
-
-  /// This function does the resampling.  If you call it with flush == true and
-  /// you have never called it with flush == false, it just resamples the input
-  /// signal (it resizes the output to a suitable number of samples).
-  ///
-  /// You can also use this function to process a signal a piece at a time.
-  /// suppose you break it into piece1, piece2, ... pieceN.  You can call
-  /// \code{.cc}
-  /// Resample(piece1, &output1, false);
-  /// Resample(piece2, &output2, false);
-  /// Resample(piece3, &output3, true);
-  /// \endcode
-  /// If you call it with flush == false, it won't output the last few samples
-  /// but will remember them, so that if you later give it a second piece of
-  /// the input signal it can process it correctly.
-  /// If your most recent call to the object was with flush == false, it will
-  /// have internal state; you can remove this by calling Reset().
-  /// Empty input is acceptable.
-  void Resample(const VectorBase<BaseFloat> &input,
-                bool flush,
-                Vector<BaseFloat> *output);
-
-  /// Calling the function Reset() resets the state of the object prior to
-  /// processing a new signal; it is only necessary if you have called
-  /// Resample(x, y, false) for some signal, leading to a remainder of the
-  /// signal being called, but then abandon processing the signal before calling
-  /// Resample(x, y, true) for the last piece.  Call it unnecessarily between
-  /// signals will not do any harm.
-  void Reset();
-
-  //// Return the input and output sampling rates (for checks, for example)
-  inline int32 GetInputSamplingRate() { return samp_rate_in_; }
-  inline int32 GetOutputSamplingRate() { return samp_rate_out_; }
- private:
-  /// This function outputs the number of output samples we will output
-  /// for a signal with "input_num_samp" input samples.  If flush == true,
-  /// we return the largest n such that
-  /// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
-  /// and note that the interval is half-open.  If flush == false,
-  /// define window_width as num_zeros / (2.0 * filter_cutoff_);
-  /// we return the largest n such that (n/samp_rate_out_) is in the interval
-  /// [ 0, input_num_samp/samp_rate_in_ - window_width ).
-  int64 GetNumOutputSamples(int64 input_num_samp, bool flush) const;
-
-
-  /// Given an output-sample index, this function outputs to *first_samp_in the
-  /// first input-sample index that we have a weight on (may be negative),
-  /// and to *samp_out_wrapped the index into weights_ where we can get the
-  /// corresponding weights on the input.
-  inline void GetIndexes(int64 samp_out,
-                         int64 *first_samp_in,
-                         int32 *samp_out_wrapped) const;
-
-  void SetRemainder(const VectorBase<BaseFloat> &input);
-
-  void SetIndexesAndWeights();
-
-  BaseFloat FilterFunc(BaseFloat) const;
-
-  // The following variables are provided by the user.
-  int32 samp_rate_in_;
-  int32 samp_rate_out_;
-  BaseFloat filter_cutoff_;
-  int32 num_zeros_;
-
-  int32 input_samples_in_unit_;   ///< The number of input samples in the
-                                  ///< smallest repeating unit: num_samp_in_ =
-                                  ///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
-                                  ///< samp_rate_out_hz)
-  int32 output_samples_in_unit_;  ///< The number of output samples in the
-                                  ///< smallest repeating unit: num_samp_out_ =
-                                  ///< samp_rate_out_hz / Gcd(samp_rate_in_hz,
-                                  ///< samp_rate_out_hz)
-
-
-  /// The first input-sample index that we sum over, for this output-sample
-  /// index.  May be negative; any truncation at the beginning is handled
-  /// separately.  This is just for the first few output samples, but we can
-  /// extrapolate the correct input-sample index for arbitrary output samples.
-  std::vector<int32> first_index_;
-
-  /// Weights on the input samples, for this output-sample index.
-  std::vector<Vector<BaseFloat> > weights_;
-
-  // the following variables keep track of where we are in a particular signal,
-  // if it is being provided over multiple calls to Resample().
-
-  int64 input_sample_offset_;  ///< The number of input samples we have
-                               ///< already received for this signal
-                               ///< (including anything in remainder_)
-  int64 output_sample_offset_;  ///< The number of samples we have already
-                                ///< output for this signal.
-  Vector<BaseFloat> input_remainder_;  ///< A small trailing part of the
-                                       ///< previously seen input signal.
-};
-
-/**
-   Downsample or upsample a waveform. This is a convenience wrapper for the
-   class 'LinearResample'.
-   The low-pass filter cutoff used in 'LinearResample' is 0.99 of the Nyquist,
-   where the Nyquist is half of the minimum of (orig_freq, new_freq).  The
-   resampling is done with a symmetric FIR filter with N_z (number of zeros)
-   as 6.
-
-   We compared the downsampling results with those from the sox resampling
-   toolkit.
-   Sox's design is inspired by Laurent De Soras' paper,
-   https://ccrma.stanford.edu/~jos/resample/Implementation.html
-
-   Note: we expect that while orig_freq and new_freq are of type BaseFloat, they
-   are actually required to have exact integer values (like 16000 or 8000) with
-   a ratio between them that can be expressed as a rational number with
-   reasonably small integer factors.
-*/
-void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
-                      BaseFloat new_freq, Vector<BaseFloat> *new_wave);
-
-
-/// This function is deprecated.  It is provided for backward compatibility, to avoid
-/// breaking older code.
-inline void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
-                               BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
-  ResampleWaveform(orig_freq, wave, new_freq, new_wave);
-}
-
-
-/// @} End of "addtogroup feat"
-}  // namespace kaldi
-#endif  // KALDI_FEAT_RESAMPLE_H_
--- a/speechx/speechx/kaldi/feat/signal.cc
+++ b/speechx/speechx/kaldi/feat/signal.cc
@ -1,129 +0,0 @@
-// feat/signal.cc
-
-// Copyright 2015  Tom Ko
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-#include "feat/signal.h"
-
-namespace kaldi {
-
-void ElementwiseProductOfFft(const Vector<BaseFloat> &a, Vector<BaseFloat> *b) {
-  int32 num_fft_bins = a.Dim() / 2;
-  for (int32 i = 0; i < num_fft_bins; i++) {
-    // do complex multiplication
-    ComplexMul(a(2*i), a(2*i + 1), &((*b)(2*i)), &((*b)(2*i + 1)));
-  }
-}
-
-void ConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
-  int32 signal_length = signal->Dim();
-  int32 filter_length = filter.Dim();
-  int32 output_length = signal_length + filter_length - 1;
-  Vector<BaseFloat> signal_padded(output_length);
-  signal_padded.SetZero();
-  for (int32 i = 0; i < signal_length; i++) {
-    for (int32 j = 0; j < filter_length; j++) {
-        signal_padded(i + j) += (*signal)(i) * filter(j);
-    }
-  }
-  signal->Resize(output_length);
-  signal->CopyFromVec(signal_padded);
-}
-
-
-void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
-  int32 signal_length = signal->Dim();
-  int32 filter_length = filter.Dim();
-  int32 output_length = signal_length + filter_length - 1;
-
-  int32 fft_length = RoundUpToNearestPowerOfTwo(output_length);
-  KALDI_VLOG(1) << "fft_length for full signal convolution is " << fft_length;
-
-  SplitRadixRealFft<BaseFloat> srfft(fft_length);
-
-  Vector<BaseFloat> filter_padded(fft_length);
-  filter_padded.Range(0, filter_length).CopyFromVec(filter);
-  srfft.Compute(filter_padded.Data(), true);
-
-  Vector<BaseFloat> signal_padded(fft_length);
-  signal_padded.Range(0, signal_length).CopyFromVec(*signal);
-  srfft.Compute(signal_padded.Data(), true);
-
-  ElementwiseProductOfFft(filter_padded, &signal_padded);
-
-  srfft.Compute(signal_padded.Data(), false);
-  signal_padded.Scale(1.0 / fft_length);
-
-  signal->Resize(output_length);
-  signal->CopyFromVec(signal_padded.Range(0, output_length));
-}
-
-void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal) {
-  int32 signal_length = signal->Dim();
-  int32 filter_length = filter.Dim();
-  int32 output_length = signal_length + filter_length - 1;
-  signal->Resize(output_length, kCopyData);
-
-  KALDI_VLOG(1) << "Length of the filter is " << filter_length;
-
-  int32 fft_length = RoundUpToNearestPowerOfTwo(4 * filter_length);
-  KALDI_VLOG(1) << "Best FFT length is " << fft_length;
-
-  int32 block_length = fft_length - filter_length + 1;
-  KALDI_VLOG(1) << "Block size is " << block_length;
-  SplitRadixRealFft<BaseFloat> srfft(fft_length);
-
-  Vector<BaseFloat> filter_padded(fft_length);
-  filter_padded.Range(0, filter_length).CopyFromVec(filter);
-  srfft.Compute(filter_padded.Data(), true);
-
-  Vector<BaseFloat> temp_pad(filter_length - 1);
-  temp_pad.SetZero();
-  Vector<BaseFloat> signal_block_padded(fft_length);
-
-  for (int32 po = 0; po < output_length; po += block_length) {
-    // get a block of the signal
-    int32 process_length = std::min(block_length, output_length - po);
-    signal_block_padded.SetZero();
-    signal_block_padded.Range(0, process_length).CopyFromVec(signal->Range(po, process_length));
-
-    srfft.Compute(signal_block_padded.Data(), true);
-
-    ElementwiseProductOfFft(filter_padded, &signal_block_padded);
-
-    srfft.Compute(signal_block_padded.Data(), false);
-    signal_block_padded.Scale(1.0 / fft_length);
-
-    // combine the block
-    if (po + block_length < output_length) {       // current block is not the last block
-      signal->Range(po, block_length).CopyFromVec(signal_block_padded.Range(0, block_length));
-      signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
-      temp_pad.CopyFromVec(signal_block_padded.Range(block_length, filter_length - 1));
-    } else {
-      signal->Range(po, output_length - po).CopyFromVec(
-                        signal_block_padded.Range(0, output_length - po));
-      if (filter_length - 1 < output_length - po)
-        signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad);
-      else
-        signal->Range(po, output_length - po).AddVec(1.0, temp_pad.Range(0, output_length - po));
-    }
-  }
-}
-}
-
--- a/speechx/speechx/kaldi/feat/signal.h
+++ b/speechx/speechx/kaldi/feat/signal.h
@ -1,58 +0,0 @@
-// feat/signal.h
-
-// Copyright 2015  Tom Ko
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_FEAT_SIGNAL_H_
-#define KALDI_FEAT_SIGNAL_H_
-
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-
-namespace kaldi {
-
-/* 
-   The following three functions are having the same functionality but
-   different implementations so as the efficiency. After the convolution,
-   the length of the signal will be extended to (original signal length +
-   filter length - 1).
-*/
-
-/*
-   This function implements a simple non-FFT-based convolution of two signals.
-   It is suggested to use the FFT-based convolution function which is more
-   efficient.
-*/
-void ConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal);
-
-/*
-   This function implements FFT-based convolution of two signals.
-   However this should be an inefficient version of BlockConvolveSignals()
-   as it processes the entire signal with a single FFT.
-*/
-void FFTbasedConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal);
-
-/*
-   This function implements FFT-based block convolution of two signals using
-   overlap-add method. This is an efficient way to evaluate the discrete
-   convolution of a long signal with a finite impulse response filter.
-*/
-void FFTbasedBlockConvolveSignals(const Vector<BaseFloat> &filter, Vector<BaseFloat> *signal);
-
-}  // namespace kaldi
-
-#endif  // KALDI_FEAT_SIGNAL_H_
--- a/speechx/speechx/kaldi/matrix/CMakeLists.txt
+++ b/speechx/speechx/kaldi/matrix/CMakeLists.txt
@ -1,16 +0,0 @@
-
-add_library(kaldi-matrix
-compressed-matrix.cc
-kaldi-matrix.cc
-kaldi-vector.cc
-matrix-functions.cc
-optimization.cc
-packed-matrix.cc
-qr.cc
-sparse-matrix.cc
-sp-matrix.cc
-srfft.cc
-tp-matrix.cc
-)
-
-target_link_libraries(kaldi-matrix gfortran kaldi-base libopenblas.a)
--- a/speechx/speechx/kaldi/matrix/cblas-wrappers.h
+++ b/speechx/speechx/kaldi/matrix/cblas-wrappers.h
@ -1,491 +0,0 @@
-// matrix/cblas-wrappers.h
-
-// Copyright 2012  Johns Hopkins University (author: Daniel Povey);
-//                 Haihua Xu; Wei Shi
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_MATRIX_CBLAS_WRAPPERS_H_
-#define KALDI_MATRIX_CBLAS_WRAPPERS_H_ 1
-
-
-#include <limits>
-#include "matrix/sp-matrix.h"
-#include "matrix/kaldi-vector.h"
-#include "matrix/kaldi-matrix.h"
-#include "matrix/matrix-functions.h"
-#include "matrix/kaldi-blas.h"
-
-// Do not include this file directly.  It is to be included
-// by .cc files in this directory.
-
-namespace kaldi {
-
-
-inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
-                        const int incY) {
-  cblas_scopy(N, X, incX, Y, incY);
-}
-
-inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
-                        const int incY) {
-  cblas_dcopy(N, X, incX, Y, incY);
-}
-
-
-inline float cblas_Xasum(const int N, const float *X, const int incX) {
-  return cblas_sasum(N, X, incX);
-}
-
-inline double cblas_Xasum(const int N, const double *X, const int incX) {
-  return cblas_dasum(N, X, incX);
-}
-
-inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
-                       const int incY, const float c, const float s) {
-  cblas_srot(N, X, incX, Y, incY, c, s);
-}
-inline void cblas_Xrot(const int N, double *X, const int incX, double *Y,
-                       const int incY, const double c, const double s) {
-  cblas_drot(N, X, incX, Y, incY, c, s);
-}
-inline float cblas_Xdot(const int N, const float *const X,
-                        const int incX, const float *const Y,
-                        const int incY) {
-  return cblas_sdot(N, X, incX, Y, incY);
-}
-inline double cblas_Xdot(const int N, const double *const X,
-                        const int incX, const double *const Y,
-                        const int incY) {
-  return cblas_ddot(N, X, incX, Y, incY);
-}
-inline void cblas_Xaxpy(const int N, const float alpha, const float *X,
-                        const int incX, float *Y, const int incY) {
-  cblas_saxpy(N, alpha, X, incX, Y, incY);
-}
-inline void cblas_Xaxpy(const int N, const double alpha, const double *X,
-                        const int incX, double *Y, const int incY) {
-  cblas_daxpy(N, alpha, X, incX, Y, incY);
-}
-inline void cblas_Xscal(const int N, const float alpha, float *data,
-                        const int inc) {
-  cblas_sscal(N, alpha, data, inc);
-}
-inline void cblas_Xscal(const int N, const double alpha, double *data, 
-                        const int inc) {
-  cblas_dscal(N, alpha, data, inc);
-}
-inline void cblas_Xspmv(const float alpha, const int num_rows, const float *Mdata,
-                        const float *v, const int v_inc,
-                        const float beta, float *y, const int y_inc) {
-  cblas_sspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
-}
-inline void cblas_Xspmv(const double alpha, const int num_rows, const double *Mdata,
-                        const double *v, const int v_inc,
-                        const double beta, double *y, const int y_inc) {
-  cblas_dspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
-}
-inline void cblas_Xtpmv(MatrixTransposeType trans, const float *Mdata,
-                        const int num_rows, float *y, const int y_inc) {
-  cblas_stpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              CblasNonUnit, num_rows, Mdata, y, y_inc);
-}
-inline void cblas_Xtpmv(MatrixTransposeType trans, const double *Mdata,
-                        const int num_rows, double *y, const int y_inc) {
-  cblas_dtpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              CblasNonUnit, num_rows, Mdata, y, y_inc);
-}
-
-
-inline void cblas_Xtpsv(MatrixTransposeType trans, const float *Mdata,
-                        const int num_rows, float *y, const int y_inc) {
-  cblas_stpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              CblasNonUnit, num_rows, Mdata, y, y_inc);
-}
-inline void cblas_Xtpsv(MatrixTransposeType trans, const double *Mdata,
-                        const int num_rows, double *y, const int y_inc) {
-  cblas_dtpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              CblasNonUnit, num_rows, Mdata, y, y_inc);
-}
-
-// x = alpha * M * y + beta * x
-inline void cblas_Xspmv(MatrixIndexT dim, float alpha, const float *Mdata,
-                        const float *ydata, MatrixIndexT ystride,
-                        float beta, float *xdata, MatrixIndexT xstride) {
-  cblas_sspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
-              ydata, ystride, beta, xdata, xstride);
-}
-inline void cblas_Xspmv(MatrixIndexT dim, double alpha, const double *Mdata,
-                        const double *ydata, MatrixIndexT ystride,
-                        double beta, double *xdata, MatrixIndexT xstride) {
-  cblas_dspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
-              ydata, ystride, beta, xdata, xstride);
-}
-
-// Implements  A += alpha * (x y'  + y x'); A is symmetric matrix.
-inline void cblas_Xspr2(MatrixIndexT dim, float alpha, const float *Xdata,
-                        MatrixIndexT incX, const float *Ydata, MatrixIndexT incY,
-                          float *Adata) {
-  cblas_sspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
-              incX, Ydata, incY, Adata);
-}
-inline void cblas_Xspr2(MatrixIndexT dim, double alpha, const double *Xdata,
-                        MatrixIndexT incX, const double *Ydata, MatrixIndexT incY,
-                        double *Adata) {
-  cblas_dspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
-              incX, Ydata, incY, Adata);
-}
-
-// Implements  A += alpha * (x x'); A is symmetric matrix.
-inline void cblas_Xspr(MatrixIndexT dim, float alpha, const float *Xdata,
-                       MatrixIndexT incX, float *Adata) {
-  cblas_sspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
-}
-inline void cblas_Xspr(MatrixIndexT dim, double alpha, const double *Xdata,
-                       MatrixIndexT incX, double *Adata) {
-  cblas_dspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
-}
-
-// sgemv,dgemv: y = alpha M x + beta y.
-inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, float alpha, const float *Mdata,
-                        MatrixIndexT stride, const float *xdata,
-                        MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
-  cblas_sgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
-}
-inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, double alpha, const double *Mdata,
-                        MatrixIndexT stride, const double *xdata,
-                        MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
-  cblas_dgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
-}
-
-// sgbmv, dgmmv: y = alpha M x +  + beta * y.
-inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, MatrixIndexT num_below,
-                        MatrixIndexT num_above, float alpha, const float *Mdata,
-                        MatrixIndexT stride, const float *xdata,
-                        MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
-  cblas_sgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
-              incX, beta, ydata, incY);
-}
-inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
-                        MatrixIndexT num_cols, MatrixIndexT num_below,
-                        MatrixIndexT num_above, double alpha, const double *Mdata,
-                        MatrixIndexT stride, const double *xdata,
-                        MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
-  cblas_dgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
-              num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
-              incX, beta, ydata, incY);
-}
-
-
-template<typename Real>
-inline void Xgemv_sparsevec(MatrixTransposeType trans, MatrixIndexT num_rows,
-                            MatrixIndexT num_cols, Real alpha, const Real *Mdata,
-                            MatrixIndexT stride, const Real *xdata,
-                            MatrixIndexT incX, Real beta, Real *ydata,
-                            MatrixIndexT incY) {
-  if (trans == kNoTrans) {
-    if (beta != 1.0) cblas_Xscal(num_rows, beta, ydata, incY);
-    for (MatrixIndexT i = 0; i < num_cols; i++) {
-      Real x_i = xdata[i * incX];
-      if (x_i == 0.0) continue;
-      // Add to ydata, the i'th column of M, times alpha * x_i
-      cblas_Xaxpy(num_rows, x_i * alpha, Mdata + i, stride, ydata, incY);
-    }    
-  } else {
-    if (beta != 1.0) cblas_Xscal(num_cols, beta, ydata, incY);
-    for (MatrixIndexT i = 0; i < num_rows; i++) {
-      Real x_i = xdata[i * incX];
-      if (x_i == 0.0) continue;
-      // Add to ydata, the i'th row of M, times alpha * x_i
-      cblas_Xaxpy(num_cols, x_i * alpha,
-                  Mdata + (i * stride), 1, ydata, incY);
-    }
-  }
-}
-
-inline void cblas_Xgemm(const float alpha,
-                        MatrixTransposeType transA,
-                        const float *Adata,
-                        MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
-                        MatrixTransposeType transB, 
-                        const float *Bdata, MatrixIndexT b_stride,
-                        const float beta,
-                        float *Mdata, 
-                        MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
-  cblas_sgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), 
-              static_cast<CBLAS_TRANSPOSE>(transB),
-              num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
-              alpha, Adata, a_stride, Bdata, b_stride,
-              beta, Mdata, stride); 
-}
-inline void cblas_Xgemm(const double alpha,
-                        MatrixTransposeType transA,
-                        const double *Adata,
-                        MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
-                        MatrixTransposeType transB, 
-                        const double *Bdata, MatrixIndexT b_stride,
-                        const double beta,
-                        double *Mdata, 
-                        MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
-  cblas_dgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), 
-              static_cast<CBLAS_TRANSPOSE>(transB),
-              num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
-              alpha, Adata, a_stride, Bdata, b_stride,
-              beta, Mdata, stride); 
-}
-
-
-inline void cblas_Xsymm(const float alpha,
-                        MatrixIndexT sz,
-                        const float *Adata,MatrixIndexT a_stride,
-                        const float *Bdata,MatrixIndexT b_stride,
-                        const float beta,
-                        float *Mdata, MatrixIndexT stride) {
-  cblas_ssymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
-              a_stride, Bdata, b_stride, beta, Mdata, stride);
-}
-inline void cblas_Xsymm(const double alpha,
-                        MatrixIndexT sz,
-                        const double *Adata,MatrixIndexT a_stride,
-                        const double *Bdata,MatrixIndexT b_stride,
-                        const double beta,
-                        double *Mdata, MatrixIndexT stride) {
-  cblas_dsymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
-              a_stride, Bdata, b_stride, beta, Mdata, stride);
-}
-// ger: M += alpha x y^T.
-inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, float alpha,
-                       const float *xdata, MatrixIndexT incX, const float *ydata,
-                       MatrixIndexT incY, float *Mdata, MatrixIndexT stride) {
-  cblas_sger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
-             Mdata, stride);
-}
-inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, double alpha,
-                       const double *xdata, MatrixIndexT incX, const double *ydata,
-                       MatrixIndexT incY, double *Mdata, MatrixIndexT stride) {
-  cblas_dger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
-             Mdata, stride);
-}
-
-// syrk: symmetric rank-k update.
-// if trans==kNoTrans, then C = alpha A A^T + beta C
-// else C = alpha A^T A + beta C.
-// note: dim_c is dim(C), other_dim_a is the "other" dimension of A, i.e.
-// num-cols(A) if kNoTrans, or num-rows(A) if kTrans.
-// We only need the row-major and lower-triangular option of this, and this
-// is hard-coded.
-inline void cblas_Xsyrk (
-    const MatrixTransposeType trans, const MatrixIndexT dim_c,
-    const MatrixIndexT other_dim_a, const float alpha, const float *A,
-    const MatrixIndexT a_stride, const float beta, float *C,
-    const MatrixIndexT c_stride) {
-  cblas_ssyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
-}
-
-inline void cblas_Xsyrk(
-    const MatrixTransposeType trans, const MatrixIndexT dim_c,
-    const MatrixIndexT other_dim_a, const double alpha, const double *A,
-    const MatrixIndexT a_stride, const double beta, double *C,
-    const MatrixIndexT c_stride) {
-  cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
-              dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
-}
-
-/// matrix-vector multiply using a banded matrix; we always call this
-/// with b = 1 meaning we're multiplying by a diagonal matrix.  This is used for
-/// elementwise multiplication.  We miss some of the arguments out of this
-/// wrapper.
-inline void cblas_Xsbmv1(
-    const MatrixIndexT dim,
-    const double *A,
-    const double alpha,
-    const double *x,
-    const double beta,
-    double *y) {
-  cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
-              1, x, 1, beta, y, 1);
-}
-
-inline void cblas_Xsbmv1(
-    const MatrixIndexT dim,
-    const float *A,
-    const float alpha,
-    const float *x,
-    const float beta,
-    float *y) {
-  cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
-              1, x, 1, beta, y, 1);
-}
-
-/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
-/// extend this somehow.
-inline void mul_elements(
-    const MatrixIndexT dim,
-    const double *a,
-    double *b) { // does b *= a, elementwise.
-  double c1, c2, c3, c4;
-  MatrixIndexT i;
-  for (i = 0; i + 4 <= dim; i += 4) {
-    c1 = a[i] * b[i];
-    c2 = a[i+1] * b[i+1];
-    c3 = a[i+2] * b[i+2];
-    c4 = a[i+3] * b[i+3];
-    b[i] = c1;
-    b[i+1] = c2;
-    b[i+2] = c3;
-    b[i+3] = c4;
-  }
-  for (; i < dim; i++)
-    b[i] *= a[i];
-}
-
-inline void mul_elements(
-    const MatrixIndexT dim,
-    const float *a,
-    float *b) { // does b *= a, elementwise.
-  float c1, c2, c3, c4;
-  MatrixIndexT i;
-  for (i = 0; i + 4 <= dim; i += 4) {
-    c1 = a[i] * b[i];
-    c2 = a[i+1] * b[i+1];
-    c3 = a[i+2] * b[i+2];
-    c4 = a[i+3] * b[i+3];
-    b[i] = c1;
-    b[i+1] = c2;
-    b[i+2] = c3;
-    b[i+3] = c4;
-  }
-  for (; i < dim; i++)
-    b[i] *= a[i];
-}
-
-
-
-// add clapack here
-#if !defined(HAVE_ATLAS)
-inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
-  stptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
-}
-inline void clapack_Xtptri(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *result) {
-  dtptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
-}
-// 
-inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, 
-                            float *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, 
-                            KaldiBlasInt *result) {
-  sgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
-}
-inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, 
-                            double *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, 
-                            KaldiBlasInt *result) {
-  dgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
-}
-
-// 
-inline void clapack_Xgetri2(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
-                           KaldiBlasInt *pivot, float *p_work, 
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  sgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
-}
-inline void clapack_Xgetri2(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
-                           KaldiBlasInt *pivot, double *p_work, 
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  dgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
-}
-//
-inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
-                           KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
-                           float *sv, float *Vdata, KaldiBlasInt *vstride,
-                           float *Udata, KaldiBlasInt *ustride, float *p_work,
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  sgesvd_(v, u,
-          num_cols, num_rows, Mdata, stride,
-          sv, Vdata, vstride, Udata, ustride, 
-          p_work, l_work, result); 
-}
-inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
-                           KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
-                           double *sv, double *Vdata, KaldiBlasInt *vstride,
-                           double *Udata, KaldiBlasInt *ustride, double *p_work,
-                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
-  dgesvd_(v, u,
-          num_cols, num_rows, Mdata, stride,
-          sv, Vdata, vstride, Udata, ustride,
-          p_work, l_work, result); 
-}
-//
-void inline clapack_Xsptri(KaldiBlasInt *num_rows, float *Mdata, 
-                           KaldiBlasInt *ipiv, float *work, KaldiBlasInt *result) {
-  ssptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
-}
-void inline clapack_Xsptri(KaldiBlasInt *num_rows, double *Mdata, 
-                           KaldiBlasInt *ipiv, double *work, KaldiBlasInt *result) {
-  dsptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
-}
-//
-void inline clapack_Xsptrf(KaldiBlasInt *num_rows, float *Mdata,
-                           KaldiBlasInt *ipiv, KaldiBlasInt *result) {
-  ssptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
-}
-void inline clapack_Xsptrf(KaldiBlasInt *num_rows, double *Mdata,
-                           KaldiBlasInt *ipiv, KaldiBlasInt *result) {
-  dsptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
-}
-#else
-inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
-                           float *Mdata, MatrixIndexT stride, 
-                           int *pivot, int *result) {
-  *result = clapack_sgetrf(CblasColMajor, num_rows, num_cols,
-                              Mdata, stride, pivot);
-}
-
-inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
-                           double *Mdata, MatrixIndexT stride, 
-                           int *pivot, int *result) {
-  *result = clapack_dgetrf(CblasColMajor, num_rows, num_cols,
-                              Mdata, stride, pivot);
-}
-//
-inline int clapack_Xtrtri(int num_rows, float *Mdata, MatrixIndexT stride) {
-  return  clapack_strtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
-                              Mdata, stride);
-}
-
-inline int clapack_Xtrtri(int num_rows, double *Mdata, MatrixIndexT stride) {
-  return  clapack_dtrtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
-                              Mdata, stride);
-}
-//
-inline void clapack_Xgetri(MatrixIndexT num_rows, float *Mdata, MatrixIndexT stride,
-                      int *pivot, int *result) {
-  *result = clapack_sgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
-}
-inline void clapack_Xgetri(MatrixIndexT num_rows, double *Mdata, MatrixIndexT stride,
-                      int *pivot, int *result) {
-  *result = clapack_dgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
-}
-#endif
-
-}
-// namespace kaldi
-
-#endif
--- a/speechx/speechx/kaldi/matrix/compressed-matrix.cc
+++ b/speechx/speechx/kaldi/matrix/compressed-matrix.cc
@ -1,876 +0,0 @@
-// matrix/compressed-matrix.cc
-
-// Copyright 2012    Johns Hopkins University (author: Daniel Povey)
-//                   Frantisek Skala, Wei Shi
-//           2015    Tom Ko
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include "matrix/compressed-matrix.h"
-#include <algorithm>
-
-namespace kaldi {
-
-//static
-MatrixIndexT CompressedMatrix::DataSize(const GlobalHeader &header) {
-  // Returns size in bytes of the data.
-  DataFormat format = static_cast<DataFormat>(header.format);
-  if (format == kOneByteWithColHeaders) {
-    return sizeof(GlobalHeader) +
-        header.num_cols * (sizeof(PerColHeader) + header.num_rows);
-  } else if (format == kTwoByte) {
-    return sizeof(GlobalHeader) +
-        2 * header.num_rows * header.num_cols;
-  } else {
-    KALDI_ASSERT(format == kOneByte);
-    return sizeof(GlobalHeader) +
-        header.num_rows * header.num_cols;
-  }
-}
-
-// scale all element of matrix by scaling floats
-// in GlobalHeader with alpha.
-void CompressedMatrix::Scale(float alpha) {
-  if (data_ != NULL) {
-    GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-    // scale the floating point values in each PerColHolder
-    // and leave all integers the same.
-    h->min_value *= alpha;
-    h->range *= alpha;
-  }
-}
-
-template<typename Real>  // static inline
-void CompressedMatrix::ComputeGlobalHeader(
-    const MatrixBase<Real> &mat, CompressionMethod method,
-    GlobalHeader *header) {
-  if (method == kAutomaticMethod) {
-    if (mat.NumRows() > 8) method = kSpeechFeature;
-    else method = kTwoByteAuto;
-  }
-
-  switch (method) {
-    case kSpeechFeature:
-      header->format = static_cast<int32>(kOneByteWithColHeaders);  // 1.
-      break;
-    case kTwoByteAuto: case kTwoByteSignedInteger:
-      header->format = static_cast<int32>(kTwoByte);  // 2.
-      break;
-    case kOneByteAuto: case kOneByteUnsignedInteger: case kOneByteZeroOne:
-      header->format = static_cast<int32>(kOneByte);  // 3.
-      break;
-    default:
-      KALDI_ERR << "Invalid compression type: "
-                << static_cast<int32>(method);
-  }
-
-  header->num_rows = mat.NumRows();
-  header->num_cols = mat.NumCols();
-
-  // Now compute 'min_value' and 'range'.
-  switch (method) {
-    case kSpeechFeature: case kTwoByteAuto: case kOneByteAuto: {
-      float min_value = mat.Min(), max_value = mat.Max();
-      // ensure that max_value is strictly greater than min_value, even if matrix is
-      // constant; this avoids crashes in ComputeColHeader when compressing speech
-      // featupres.
-      if (max_value == min_value)
-        max_value = min_value + (1.0 + fabs(min_value));
-      KALDI_ASSERT(min_value - min_value == 0 &&
-                   max_value - max_value == 0 &&
-                   "Cannot compress a matrix with Nan's or Inf's");
-
-      header->min_value = min_value;
-      header->range = max_value - min_value;
-
-      // we previously checked that max_value != min_value, so their
-      // difference should be nonzero.
-      KALDI_ASSERT(header->range > 0.0);
-      break;
-    }
-    case kTwoByteSignedInteger: {
-      header->min_value = -32768.0;
-      header->range = 65535.0;
-      break;
-    }
-    case kOneByteUnsignedInteger: {
-      header->min_value = 0.0;
-      header->range = 255.0;
-      break;
-    }
-    case kOneByteZeroOne: {
-      header->min_value = 0.0;
-      header->range = 1.0;
-      break;
-    }
-    default:
-      KALDI_ERR << "Unknown compression method = "
-                << static_cast<int32>(method);
-  }
-  KALDI_COMPILE_TIME_ASSERT(sizeof(*header) == 20);  // otherwise
-  // something weird is happening and our code probably won't work or
-  // won't be robust across platforms.
-}
-
-template<typename Real>
-void CompressedMatrix::CopyFromMat(
-    const MatrixBase<Real> &mat, CompressionMethod method) {
-  if (data_ != NULL) {
-    delete [] static_cast<float*>(data_);  // call delete [] because was allocated with new float[]
-    data_ = NULL;
-  }
-  if (mat.NumRows() == 0) { return; }  // Zero-size matrix stored as zero pointer.
-
-
-  GlobalHeader global_header;
-  ComputeGlobalHeader(mat, method, &global_header);
-
-  int32 data_size = DataSize(global_header);
-
-  data_ = AllocateData(data_size);
-
-  *(reinterpret_cast<GlobalHeader*>(data_)) = global_header;
-
-  DataFormat format = static_cast<DataFormat>(global_header.format);
-  if (format == kOneByteWithColHeaders) {
-    PerColHeader *header_data =
-        reinterpret_cast<PerColHeader*>(static_cast<char*>(data_) +
-                                        sizeof(GlobalHeader));
-    uint8 *byte_data =
-        reinterpret_cast<uint8*>(header_data + global_header.num_cols);
-
-    const Real *matrix_data = mat.Data();
-
-    for (int32 col = 0; col < global_header.num_cols; col++) {
-      CompressColumn(global_header,
-                     matrix_data + col, mat.Stride(),
-                     global_header.num_rows,
-                     header_data, byte_data);
-      header_data++;
-      byte_data += global_header.num_rows;
-    }
-  } else if (format == kTwoByte) {
-    uint16 *data = reinterpret_cast<uint16*>(static_cast<char*>(data_) +
-                                             sizeof(GlobalHeader));
-    int32 num_rows = mat.NumRows(), num_cols = mat.NumCols();
-    for (int32 r = 0; r < num_rows; r++) {
-      const Real *row_data = mat.RowData(r);
-      for (int32 c = 0; c < num_cols; c++)
-        data[c] = FloatToUint16(global_header, row_data[c]);
-      data += num_cols;
-    }
-  } else {
-    KALDI_ASSERT(format == kOneByte);
-    uint8 *data = reinterpret_cast<uint8*>(static_cast<char*>(data_) +
-                                           sizeof(GlobalHeader));
-    int32 num_rows = mat.NumRows(), num_cols = mat.NumCols();
-    for (int32 r = 0; r < num_rows; r++) {
-      const Real *row_data = mat.RowData(r);
-      for (int32 c = 0; c < num_cols; c++)
-        data[c] = FloatToUint8(global_header, row_data[c]);
-      data += num_cols;
-    }
-  }
-}
-
-// Instantiate the template for float and double.
-template
-void CompressedMatrix::CopyFromMat(const MatrixBase<float> &mat,
-                                   CompressionMethod method);
-
-template
-void CompressedMatrix::CopyFromMat(const MatrixBase<double> &mat,
-                                   CompressionMethod method);
-
-
-CompressedMatrix::CompressedMatrix(
-    const CompressedMatrix &cmat,
-    const MatrixIndexT row_offset,
-    const MatrixIndexT num_rows,
-    const MatrixIndexT col_offset,
-    const MatrixIndexT num_cols,
-    bool allow_padding): data_(NULL) {
-  int32 old_num_rows = cmat.NumRows(), old_num_cols = cmat.NumCols();
-
-  if (old_num_rows == 0) {
-    KALDI_ASSERT(num_rows == 0 && num_cols == 0);
-    // The empty matrix is stored as a zero pointer.
-    return;
-  }
-
-  KALDI_ASSERT(row_offset < old_num_rows);
-  KALDI_ASSERT(col_offset < old_num_cols);
-  KALDI_ASSERT(row_offset >= 0 || allow_padding);
-  KALDI_ASSERT(col_offset >= 0);
-  KALDI_ASSERT(row_offset + num_rows <= old_num_rows || allow_padding);
-  KALDI_ASSERT(col_offset + num_cols <= old_num_cols);
-
-  if (num_rows == 0 || num_cols == 0) { return; }
-
-  bool padding_is_used = (row_offset < 0 ||
-                          row_offset + num_rows > old_num_rows);
-
-  GlobalHeader new_global_header;
-  KALDI_COMPILE_TIME_ASSERT(sizeof(new_global_header) == 20);
-
-  GlobalHeader *old_global_header = reinterpret_cast<GlobalHeader*>(cmat.Data());
-
-  new_global_header = *old_global_header;
-  new_global_header.num_cols = num_cols;
-  new_global_header.num_rows = num_rows;
-
-  // We don't switch format from 1 -> 2 (in case of size reduction) yet; if this
-  // is needed, we will do this below by creating a temporary Matrix.
-  new_global_header.format = old_global_header->format;
-
-  data_ = AllocateData(DataSize(new_global_header));  // allocate memory
-  *(reinterpret_cast<GlobalHeader*>(data_)) = new_global_header;
-
-
-  DataFormat format = static_cast<DataFormat>(old_global_header->format);
-  if (format == kOneByteWithColHeaders) {
-    PerColHeader *old_per_col_header =
-        reinterpret_cast<PerColHeader*>(old_global_header + 1);
-    uint8 *old_byte_data =
-        reinterpret_cast<uint8*>(old_per_col_header +
-                                 old_global_header->num_cols);
-    PerColHeader *new_per_col_header =
-        reinterpret_cast<PerColHeader*>(
-            reinterpret_cast<GlobalHeader*>(data_) + 1);
-
-    memcpy(new_per_col_header, old_per_col_header + col_offset,
-           sizeof(PerColHeader) * num_cols);
-
-    uint8 *new_byte_data =
-        reinterpret_cast<uint8*>(new_per_col_header + num_cols);
-    if (!padding_is_used) {
-      uint8 *old_start_of_subcol =
-          old_byte_data + row_offset + (col_offset * old_num_rows),
-          *new_start_of_col = new_byte_data;
-      for (int32 i = 0; i < num_cols; i++) {
-        memcpy(new_start_of_col, old_start_of_subcol, num_rows);
-        new_start_of_col += num_rows;
-        old_start_of_subcol += old_num_rows;
-      }
-    } else {
-      uint8 *old_start_of_col =
-          old_byte_data + (col_offset * old_num_rows),
-          *new_start_of_col = new_byte_data;
-      for (int32 i = 0; i < num_cols; i++) {
-
-        for (int32 j = 0; j < num_rows; j++) {
-          int32 old_j = j + row_offset;
-          if (old_j < 0) old_j = 0;
-          else if (old_j >= old_num_rows) old_j = old_num_rows - 1;
-          new_start_of_col[j] = old_start_of_col[old_j];
-        }
-        new_start_of_col += num_rows;
-        old_start_of_col += old_num_rows;
-      }
-    }
-  } else if (format == kTwoByte) {
-    const uint16 *old_data =
-        reinterpret_cast<const uint16*>(old_global_header + 1);
-    uint16 *new_row_data =
-        reinterpret_cast<uint16*>(reinterpret_cast<GlobalHeader*>(data_) + 1);
-
-    for (int32 row = 0; row < num_rows; row++) {
-      int32 old_row = row + row_offset;
-      // The next two lines are only relevant if padding_is_used.
-      if (old_row < 0) old_row = 0;
-      else if (old_row >= old_num_rows) old_row = old_num_rows - 1;
-      const uint16 *old_row_data =
-          old_data + col_offset + (old_num_cols * old_row);
-      memcpy(new_row_data, old_row_data, sizeof(uint16) * num_cols);
-      new_row_data += num_cols;
-    }
-  } else {
-    KALDI_ASSERT(format == kOneByte);
-    const uint8 *old_data =
-        reinterpret_cast<const uint8*>(old_global_header + 1);
-    uint8 *new_row_data =
-        reinterpret_cast<uint8*>(reinterpret_cast<GlobalHeader*>(data_) + 1);
-
-    for (int32 row = 0; row < num_rows; row++) {
-      int32 old_row = row + row_offset;
-      // The next two lines are only relevant if padding_is_used.
-      if (old_row < 0) old_row = 0;
-      else if (old_row >= old_num_rows) old_row = old_num_rows - 1;
-      const uint8 *old_row_data =
-          old_data + col_offset + (old_num_cols * old_row);
-      memcpy(new_row_data, old_row_data, sizeof(uint8) * num_cols);
-      new_row_data += num_cols;
-    }
-  }
-
-  if (num_rows < 8 && format == kOneByteWithColHeaders) {
-    // format was 1 but we want it to be 2 -> create a temporary
-    // Matrix (uncompress), re-compress, and swap.
-    // This gives us almost exact reconstruction while saving
-    // memory (the elements take more space but there will be
-    // no per-column headers).
-    Matrix<float> temp(this->NumRows(), this->NumCols(),
-                       kUndefined);
-    this->CopyToMat(&temp);
-    CompressedMatrix temp_cmat(temp, kTwoByteAuto);
-    this->Swap(&temp_cmat);
-  }
-}
-
-
-template<typename Real>
-CompressedMatrix &CompressedMatrix::operator =(const MatrixBase<Real> &mat) {
-  this->CopyFromMat(mat);
-  return *this;
-}
-
-// Instantiate the template for float and double.
-template
-CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<float> &mat);
-
-template
-CompressedMatrix& CompressedMatrix::operator =(const MatrixBase<double> &mat);
-
-inline uint16 CompressedMatrix::FloatToUint16(
-    const GlobalHeader &global_header,
-    float value) {
-  float f = (value - global_header.min_value) /
-      global_header.range;
-  if (f > 1.0) f = 1.0;  // Note: this should not happen.
-  if (f < 0.0) f = 0.0;  // Note: this should not happen.
-  return static_cast<int>(f * 65535 + 0.499);  // + 0.499 is to
-  // round to closest int; avoids bias.
-}
-
-
-inline uint8 CompressedMatrix::FloatToUint8(
-    const GlobalHeader &global_header,
-    float value) {
-  float f = (value - global_header.min_value) /
-      global_header.range;
-  if (f > 1.0) f = 1.0;  // Note: this should not happen.
-  if (f < 0.0) f = 0.0;  // Note: this should not happen.
-  return static_cast<int>(f * 255 + 0.499);  // + 0.499 is to
-  // round to closest int; avoids bias.
-}
-
-
-inline float CompressedMatrix::Uint16ToFloat(
-    const GlobalHeader &global_header,
-    uint16 value) {
-  // the constant 1.52590218966964e-05 is 1/65535.
-  return global_header.min_value
-      + global_header.range * 1.52590218966964e-05F * value;
-}
-
-template<typename Real>  // static
-void CompressedMatrix::ComputeColHeader(
-    const GlobalHeader &global_header,
-    const Real *data, MatrixIndexT stride,
-    int32 num_rows, CompressedMatrix::PerColHeader *header) {
-  KALDI_ASSERT(num_rows > 0);
-  std::vector<Real> sdata(num_rows); // the sorted data.
-  for (size_t i = 0, size = sdata.size(); i < size; i++)
-    sdata[i] = data[i*stride];
-
-  if (num_rows >= 5) {
-    int quarter_nr = num_rows/4;
-    // std::sort(sdata.begin(), sdata.end());
-    // The elements at positions 0, quarter_nr,
-    // 3*quarter_nr, and num_rows-1 need to be in sorted order.
-    std::nth_element(sdata.begin(), sdata.begin() + quarter_nr, sdata.end());
-    // Now, sdata.begin() + quarter_nr contains the element that would appear
-    // in sorted order, in that position.
-    std::nth_element(sdata.begin(), sdata.begin(), sdata.begin() + quarter_nr);
-    // Now, sdata.begin() and sdata.begin() + quarter_nr contain the elements
-    // that would appear at those positions in sorted order.
-    std::nth_element(sdata.begin() + quarter_nr + 1,
-                     sdata.begin() + (3*quarter_nr), sdata.end());
-    // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
-    // 3*quarter_nr, contain the elements that would appear at those positions
-    // in sorted order.
-    std::nth_element(sdata.begin() + (3*quarter_nr) + 1, sdata.end() - 1,
-                     sdata.end());
-    // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() +
-    // 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear
-    // at those positions in sorted order.
-
-    header->percentile_0 =
-        std::min<uint16>(FloatToUint16(global_header, sdata[0]), 65532);
-    header->percentile_25 =
-        std::min<uint16>(
-            std::max<uint16>(
-                FloatToUint16(global_header, sdata[quarter_nr]),
-                header->percentile_0 + static_cast<uint16>(1)), 65533);
-    header->percentile_75 =
-        std::min<uint16>(
-            std::max<uint16>(
-                FloatToUint16(global_header, sdata[3*quarter_nr]),
-                header->percentile_25 + static_cast<uint16>(1)), 65534);
-    header->percentile_100 = std::max<uint16>(
-        FloatToUint16(global_header, sdata[num_rows-1]),
-        header->percentile_75 + static_cast<uint16>(1));
-
-  } else {  // handle this pathological case.
-    std::sort(sdata.begin(), sdata.end());
-    // Note: we know num_rows is at least 1.
-    header->percentile_0 =
-        std::min<uint16>(FloatToUint16(global_header, sdata[0]),
-                         65532);
-    if (num_rows > 1)
-      header->percentile_25 =
-          std::min<uint16>(
-              std::max<uint16>(FloatToUint16(global_header, sdata[1]),
-                               header->percentile_0 + 1), 65533);
-    else
-      header->percentile_25 = header->percentile_0 + 1;
-    if (num_rows > 2)
-      header->percentile_75 =
-          std::min<uint16>(
-              std::max<uint16>(FloatToUint16(global_header, sdata[2]),
-                               header->percentile_25 + 1), 65534);
-    else
-      header->percentile_75 = header->percentile_25 + 1;
-    if (num_rows > 3)
-      header->percentile_100 =
-          std::max<uint16>(FloatToUint16(global_header, sdata[3]),
-                           header->percentile_75 + 1);
-    else
-      header->percentile_100 = header->percentile_75 + 1;
-  }
-}
-
-// static
-inline uint8 CompressedMatrix::FloatToChar(
-    float p0, float p25, float p75, float p100,
-    float value) {
-  int ans;
-  if (value < p25) {  // range [ p0, p25 ) covered by
-    // characters 0 .. 64.  We round to the closest int.
-    float f = (value - p0) / (p25 - p0);
-    ans = static_cast<int>(f * 64 + 0.5);
-    // Note: the checks on the next two lines
-    // are necessary in pathological cases when all the elements in a row
-    // are the same and the percentile_* values are separated by one.
-    if (ans < 0) ans = 0;
-    if (ans > 64) ans = 64;
-  } else if (value < p75) {  // range [ p25, p75 )covered
-    // by characters 64 .. 192.  We round to the closest int.
-    float f = (value - p25) / (p75 - p25);
-    ans = 64 + static_cast<int>(f * 128 + 0.5);
-    if (ans < 64) ans = 64;
-    if (ans > 192) ans = 192;
-  } else {  // range [ p75, p100 ] covered by
-    // characters 192 .. 255.  Note: this last range
-    // has fewer characters than the left range, because
-    // we go up to 255, not 256.
-    float f = (value - p75) / (p100 - p75);
-    ans = 192 + static_cast<int>(f * 63 + 0.5);
-    if (ans < 192) ans = 192;
-    if (ans > 255) ans = 255;
-  }
-  return static_cast<uint8>(ans);
-}
-
-
-// static
-inline float CompressedMatrix::CharToFloat(
-    float p0, float p25, float p75, float p100,
-    uint8 value) {
-  if (value <= 64) {
-    return p0 + (p25 - p0) * value * (1/64.0);
-  } else if (value <= 192) {
-    return p25 + (p75 - p25) * (value - 64) * (1/128.0);
-  } else {
-    return p75 + (p100 - p75) * (value - 192) * (1/63.0);
-  }
-}
-
-
-template<typename Real>  // static
-void CompressedMatrix::CompressColumn(
-    const GlobalHeader &global_header,
-    const Real *data, MatrixIndexT stride,
-    int32 num_rows, CompressedMatrix::PerColHeader *header,
-    uint8 *byte_data) {
-  ComputeColHeader(global_header, data, stride,
-                   num_rows, header);
-
-  float p0 = Uint16ToFloat(global_header, header->percentile_0),
-      p25 = Uint16ToFloat(global_header, header->percentile_25),
-      p75 = Uint16ToFloat(global_header, header->percentile_75),
-      p100 = Uint16ToFloat(global_header, header->percentile_100);
-
-  for (int32 i = 0; i < num_rows; i++) {
-    Real this_data = data[i * stride];
-    byte_data[i] = FloatToChar(p0, p25, p75, p100, this_data);
-  }
-}
-
-// static
-void* CompressedMatrix::AllocateData(int32 num_bytes) {
-  KALDI_ASSERT(num_bytes > 0);
-  KALDI_COMPILE_TIME_ASSERT(sizeof(float) == 4);
-  // round size up to nearest number of floats.
-  return reinterpret_cast<void*>(new float[(num_bytes/3) + 4]);
-}
-
-void CompressedMatrix::Write(std::ostream &os, bool binary) const {
-  if (binary) {  // Binary-mode write:
-    if (data_ != NULL) {
-      GlobalHeader &h = *reinterpret_cast<GlobalHeader*>(data_);
-      DataFormat format = static_cast<DataFormat>(h.format);
-      if (format == kOneByteWithColHeaders) {
-        WriteToken(os, binary, "CM");
-      } else if (format == kTwoByte) {
-        WriteToken(os, binary, "CM2");
-      } else if (format == kOneByte) {
-        WriteToken(os, binary, "CM3");
-      }
-      MatrixIndexT size = DataSize(h);  // total size of data in data_
-      // We don't write out the "int32 format", hence the + 4, - 4.
-      os.write(reinterpret_cast<const char*>(data_) + 4, size - 4);
-    } else {  // special case: where data_ == NULL, we treat it as an empty
-      // matrix.
-      WriteToken(os, binary, "CM");
-      GlobalHeader h;
-      h.range = h.min_value = 0.0;
-      h.num_rows = h.num_cols = 0;
-      os.write(reinterpret_cast<const char*>(&h), sizeof(h));
-    }
-  } else {
-    // In text mode, just use the same format as a regular matrix.
-    // This is not compressed.
-    Matrix<BaseFloat> temp_mat(this->NumRows(), this->NumCols(),
-                               kUndefined);
-    this->CopyToMat(&temp_mat);
-    temp_mat.Write(os, binary);
-  }
-  if (os.fail())
-    KALDI_ERR << "Error writing compressed matrix to stream.";
-}
-
-void CompressedMatrix::Read(std::istream &is, bool binary) {
-  if (data_ != NULL) {
-    delete [] (static_cast<float*>(data_));
-    data_ = NULL;
-  }
-  if (binary) {
-    int peekval = Peek(is, binary);
-    if (peekval == 'C') {
-      std::string tok; // Should be CM (format 1) or CM2 (format 2)
-      ReadToken(is, binary, &tok);
-      GlobalHeader h;
-      if (tok == "CM") { h.format = 1; } //  kOneByteWithColHeaders
-      else if (tok == "CM2") { h.format = 2; }  // kTwoByte
-      else if (tok == "CM3") { h.format = 3; }  // kOneByte
-      else {
-        KALDI_ERR << "Unexpected token " << tok << ", expecting CM, CM2 or CM3";
-      }
-      // don't read the "format" -> hence + 4, - 4.
-      is.read(reinterpret_cast<char*>(&h) + 4, sizeof(h) - 4);
-      if (is.fail())
-        KALDI_ERR << "Failed to read header";
-      if (h.num_cols == 0) // empty matrix.
-        return;
-      int32 size = DataSize(h), remaining_size = size - sizeof(GlobalHeader);
-      data_ = AllocateData(size);
-      *(reinterpret_cast<GlobalHeader*>(data_)) = h;
-      is.read(reinterpret_cast<char*>(data_) + sizeof(GlobalHeader),
-              remaining_size);
-    } else {
-      // Assume that what we're reading is a regular Matrix.  This might be the
-      // case if you changed your code, making a Matrix into a CompressedMatrix,
-      // and you want back-compatibility for reading.
-      Matrix<BaseFloat> M;
-      M.Read(is, binary); // This will crash if it was not a Matrix.
-      this->CopyFromMat(M);
-    }
-  } else {  // Text-mode read.  In this case you don't get to
-    // choose the compression type.  Anyway this branch would only
-    // be taken when debugging.
-    Matrix<BaseFloat> temp;
-    temp.Read(is, binary);
-    this->CopyFromMat(temp);
-  }
-  if (is.fail())
-    KALDI_ERR << "Failed to read data.";
-}
-
-template<typename Real>
-void CompressedMatrix::CopyToMat(MatrixBase<Real> *mat,
-                                 MatrixTransposeType trans) const {
-  if (trans == kTrans) {
-    Matrix<Real> temp(this->NumCols(), this->NumRows());
-    CopyToMat(&temp, kNoTrans);
-    mat->CopyFromMat(temp, kTrans);
-    return;
-  }
-
-  if (data_ == NULL) {
-    KALDI_ASSERT(mat->NumRows() == 0);
-    KALDI_ASSERT(mat->NumCols() == 0);
-    return;
-  }
-  GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-  int32 num_cols = h->num_cols, num_rows = h->num_rows;
-  KALDI_ASSERT(mat->NumRows() == num_rows);
-  KALDI_ASSERT(mat->NumCols() == num_cols);
-
-  DataFormat format = static_cast<DataFormat>(h->format);
-  if (format == kOneByteWithColHeaders) {
-    PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-    uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
-                                                h->num_cols);
-    for (int32 i = 0; i < num_cols; i++, per_col_header++) {
-      float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-          p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-          p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-          p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-      for (int32 j = 0; j < num_rows; j++, byte_data++) {
-        float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-        (*mat)(j, i) = f;
-      }
-    }
-  } else if (format == kTwoByte) {
-    const uint16 *data = reinterpret_cast<const uint16*>(h + 1);
-    float min_value = h->min_value,
-        increment = h->range * (1.0 / 65535.0);
-    for (int32 i = 0; i < num_rows; i++) {
-      Real *row_data = mat->RowData(i);
-      for (int32 j = 0; j < num_cols; j++)
-        row_data[j] = min_value + data[j] * increment;
-      data += num_cols;
-    }
-  } else {
-    KALDI_ASSERT(format == kOneByte);
-    float min_value = h->min_value, increment = h->range * (1.0 / 255.0);
-
-    const uint8 *data = reinterpret_cast<const uint8*>(h + 1);
-    for (int32 i = 0; i < num_rows; i++) {
-      Real *row_data = mat->RowData(i);
-      for (int32 j = 0; j < num_cols; j++)
-        row_data[j] = min_value + data[j] * increment;
-      data += num_cols;
-    }
-  }
-}
-
-// Instantiate the template for float and double.
-template
-void CompressedMatrix::CopyToMat(MatrixBase<float> *mat,
-                                 MatrixTransposeType trans) const;
-template
-void CompressedMatrix::CopyToMat(MatrixBase<double> *mat,
-                                 MatrixTransposeType trans) const;
-
-template<typename Real>
-void CompressedMatrix::CopyRowToVec(MatrixIndexT row,
-                                    VectorBase<Real> *v) const {
-  KALDI_ASSERT(row < this->NumRows());
-  KALDI_ASSERT(row >= 0);
-  KALDI_ASSERT(v->Dim() == this->NumCols());
-
-  GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-  DataFormat format = static_cast<DataFormat>(h->format);
-  if (format == kOneByteWithColHeaders) {
-    PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-    uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
-                                                h->num_cols);
-    byte_data += row;  // point to first value we are interested in
-    for (int32 i = 0; i < h->num_cols;
-         i++, per_col_header++, byte_data += h->num_rows) {
-      float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-          p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-          p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-          p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-      float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-      (*v)(i) = f;
-    }
-  } else if (format == kTwoByte) {
-    int32 num_cols = h->num_cols;
-    float min_value = h->min_value,
-        increment = h->range * (1.0 / 65535.0);
-    const uint16 *row_data = reinterpret_cast<uint16*>(h + 1) + (num_cols * row);
-    Real *v_data = v->Data();
-    for (int32 c = 0; c < num_cols; c++)
-      v_data[c] = min_value + row_data[c] * increment;
-  } else {
-    KALDI_ASSERT(format == kOneByte);
-    int32 num_cols = h->num_cols;
-    float min_value = h->min_value,
-        increment = h->range * (1.0 / 255.0);
-    const uint8 *row_data = reinterpret_cast<uint8*>(h + 1) + (num_cols * row);
-    Real *v_data = v->Data();
-    for (int32 c = 0; c < num_cols; c++)
-      v_data[c] = min_value + row_data[c] * increment;
-  }
-}
-
-template<typename Real>
-void CompressedMatrix::CopyColToVec(MatrixIndexT col,
-                                    VectorBase<Real> *v) const {
-  KALDI_ASSERT(col < this->NumCols());
-  KALDI_ASSERT(col >= 0);
-  KALDI_ASSERT(v->Dim() == this->NumRows());
-
-  GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-
-  DataFormat format = static_cast<DataFormat>(h->format);
-  if (format == kOneByteWithColHeaders) {
-    PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-    uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
-                                                h->num_cols);
-    byte_data += col*h->num_rows;  // point to first value in the column we want
-    per_col_header += col;
-    float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-        p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-        p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-        p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-    for (int32 i = 0; i < h->num_rows; i++, byte_data++) {
-      float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-      (*v)(i) = f;
-    }
-  } else if (format == kTwoByte) {
-    int32 num_rows = h->num_rows, num_cols = h->num_cols;
-    float min_value = h->min_value,
-        increment = h->range * (1.0 / 65535.0);
-    const uint16 *col_data = reinterpret_cast<uint16*>(h + 1) + col;
-    Real *v_data = v->Data();
-    for (int32 r = 0; r < num_rows; r++)
-      v_data[r] = min_value + increment * col_data[r * num_cols];
-  } else {
-    KALDI_ASSERT(format == kOneByte);
-    int32 num_rows = h->num_rows, num_cols = h->num_cols;
-    float min_value = h->min_value,
-        increment = h->range * (1.0 / 255.0);
-    const uint8 *col_data = reinterpret_cast<uint8*>(h + 1) + col;
-    Real *v_data = v->Data();
-    for (int32 r = 0; r < num_rows; r++)
-      v_data[r] = min_value + increment * col_data[r * num_cols];
-  }
-}
-
-// instantiate the templates.
-template void
-CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<double> *) const;
-template void
-CompressedMatrix::CopyColToVec(MatrixIndexT, VectorBase<float> *) const;
-template void
-CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<double> *) const;
-template void
-CompressedMatrix::CopyRowToVec(MatrixIndexT, VectorBase<float> *) const;
-
-template<typename Real>
-void CompressedMatrix::CopyToMat(int32 row_offset,
-                                 int32 col_offset,
-                                 MatrixBase<Real> *dest) const {
-  KALDI_PARANOID_ASSERT(row_offset < this->NumRows());
-  KALDI_PARANOID_ASSERT(col_offset < this->NumCols());
-  KALDI_PARANOID_ASSERT(row_offset >= 0);
-  KALDI_PARANOID_ASSERT(col_offset >= 0);
-  KALDI_ASSERT(row_offset+dest->NumRows() <= this->NumRows());
-  KALDI_ASSERT(col_offset+dest->NumCols() <= this->NumCols());
-  // everything is OK
-  GlobalHeader *h = reinterpret_cast<GlobalHeader*>(data_);
-  int32 num_rows = h->num_rows, num_cols = h->num_cols,
-      tgt_cols = dest->NumCols(), tgt_rows = dest->NumRows();
-
-  DataFormat format = static_cast<DataFormat>(h->format);
-  if (format == kOneByteWithColHeaders) {
-    PerColHeader *per_col_header = reinterpret_cast<PerColHeader*>(h+1);
-    uint8 *byte_data = reinterpret_cast<uint8*>(per_col_header +
-                                                h->num_cols);
-
-    uint8 *start_of_subcol = byte_data+row_offset;  // skip appropriate
-    // number of columns
-    start_of_subcol += col_offset*num_rows;  // skip appropriate number of rows
-
-    per_col_header += col_offset;  // skip the appropriate number of headers
-
-    for (int32 i = 0;
-         i < tgt_cols;
-         i++, per_col_header++, start_of_subcol+=num_rows) {
-      byte_data = start_of_subcol;
-      float p0 = Uint16ToFloat(*h, per_col_header->percentile_0),
-          p25 = Uint16ToFloat(*h, per_col_header->percentile_25),
-          p75 = Uint16ToFloat(*h, per_col_header->percentile_75),
-          p100 = Uint16ToFloat(*h, per_col_header->percentile_100);
-      for (int32 j = 0; j < tgt_rows; j++, byte_data++) {
-        float f = CharToFloat(p0, p25, p75, p100, *byte_data);
-        (*dest)(j, i) = f;
-      }
-    }
-  } else if (format == kTwoByte) {
-    const uint16 *data = reinterpret_cast<const uint16*>(h+1) + col_offset +
-        (num_cols * row_offset);
-    float min_value = h->min_value,
-        increment = h->range * (1.0 / 65535.0);
-
-    for (int32 row = 0; row < tgt_rows; row++) {
-      Real *dest_row = dest->RowData(row);
-      for (int32 col = 0; col < tgt_cols; col++)
-        dest_row[col] = min_value + increment * data[col];
-      data += num_cols;
-    }
-  } else {
-    KALDI_ASSERT(format == kOneByte);
-    const uint8 *data = reinterpret_cast<const uint8*>(h+1) + col_offset +
-        (num_cols * row_offset);
-    float min_value = h->min_value,
-        increment = h->range * (1.0 / 255.0);
-    for (int32 row = 0; row < tgt_rows; row++) {
-      Real *dest_row = dest->RowData(row);
-      for (int32 col = 0; col < tgt_cols; col++)
-        dest_row[col] = min_value + increment * data[col];
-      data += num_cols;
-    }
-  }
-}
-
-// instantiate the templates.
-template void CompressedMatrix::CopyToMat(int32,
-                                          int32,
-                                          MatrixBase<float> *dest) const;
-template void CompressedMatrix::CopyToMat(int32,
-                                          int32,
-                                          MatrixBase<double> *dest) const;
-
-void CompressedMatrix::Clear() {
-  if (data_ != NULL) {
-    delete [] static_cast<float*>(data_);
-    data_ = NULL;
-  }
-}
-
-CompressedMatrix::CompressedMatrix(const CompressedMatrix &mat): data_(NULL) {
-  *this = mat; // use assignment operator.
-}
-
-CompressedMatrix &CompressedMatrix::operator = (const CompressedMatrix &mat) {
-  Clear(); // now this->data_ == NULL.
-  if (mat.data_ != NULL) {
-    MatrixIndexT data_size = DataSize(*static_cast<GlobalHeader*>(mat.data_));
-    data_ = AllocateData(data_size);
-    memcpy(static_cast<void*>(data_),
-           static_cast<void*>(mat.data_),
-           data_size);
-  }
-  return *this;
-}
-
-
-}  // namespace kaldi
--- a/speechx/speechx/kaldi/matrix/compressed-matrix.h
+++ b/speechx/speechx/kaldi/matrix/compressed-matrix.h
@ -1,283 +0,0 @@
-// matrix/compressed-matrix.h
-
-// Copyright 2012  Johns Hopkins University (author: Daniel Povey)
-//                 Frantisek Skala, Wei Shi
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_
-#define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1
-
-#include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-/// \addtogroup matrix_group
-/// @{
-
-
-
-/*
-  The enum CompressionMethod is used when creating a CompressedMatrix (a lossily
-  compressed matrix) from a regular Matrix.  It dictates how we choose the
-  compressed format and how we choose the ranges of floats that are represented
-  by particular integers.
-
-    kAutomaticMethod = 1 This is the default when you don't specify the
-                        compression method.  It is a shorthand for using
-                        kSpeechFeature if the num-rows is more than 8, and
-                        kTwoByteAuto otherwise.
-    kSpeechFeature = 2  This is the most complicated of the compression methods,
-                        and was designed for speech features which have a roughly
-                        Gaussian distribution with different ranges for each
-                        dimension.  Each element is stored in one byte, but there
-                        is an 8-byte header per column; the spacing of the
-                        integer values is not uniform but is in 3 ranges.
-    kTwoByteAuto = 3    Each element is stored in two bytes as a uint16, with
-                        the representable range of values chosen automatically
-                        with the minimum and maximum elements of the matrix as
-                        its edges.
-    kTwoByteSignedInteger = 4
-                        Each element is stored in two bytes as a uint16, with
-                        the representable range of value chosen to coincide with
-                        what you'd get if you stored signed integers, i.e.
-                        [-32768.0, 32767.0].  Suitable for waveform data that
-                        was previously stored as 16-bit PCM.
-    kOneByteAuto = 5    Each element is stored in one byte as a uint8, with the
-                        representable range of values chosen automatically with
-                        the minimum and maximum elements of the matrix as its
-                        edges.
-    kOneByteUnsignedInteger = 6 Each element is stored in
-                        one byte as a uint8, with the representable range of
-                        values equal to [0.0, 255.0].
-    kOneByteZeroOne = 7 Each element is stored in
-                        one byte as a uint8, with the representable range of
-                        values equal to [0.0, 1.0].  Suitable for image data
-                        that has previously been compressed as int8.
-
-    // We can add new methods here as needed: if they just imply different ways
-    // of selecting the min_value and range, and a num-bytes = 1 or 2, they will
-    // be trivial to implement.
-*/
-enum CompressionMethod {
-  kAutomaticMethod = 1,
-  kSpeechFeature = 2,
-  kTwoByteAuto = 3,
-  kTwoByteSignedInteger = 4,
-  kOneByteAuto = 5,
-  kOneByteUnsignedInteger = 6,
-  kOneByteZeroOne = 7
-};
-
-
-/*
-  This class does lossy compression of a matrix.  It supports various compression
-  methods, see enum CompressionMethod.
-*/
-
-class CompressedMatrix {
- public:
-  CompressedMatrix(): data_(NULL) { }
-
-  ~CompressedMatrix() { Clear(); }
-
-  template<typename Real>
-  explicit CompressedMatrix(const MatrixBase<Real> &mat,
-                            CompressionMethod method = kAutomaticMethod):
-      data_(NULL) { CopyFromMat(mat, method); }
-
-  /// Initializer that can be used to select part of an existing
-  /// CompressedMatrix without un-compressing and re-compressing (note: unlike
-  /// similar initializers for class Matrix, it doesn't point to the same memory
-  /// location).
-  ///
-  /// This creates a CompressedMatrix with the size (num_rows, num_cols)
-  /// starting at (row_offset, col_offset).
-  ///
-  /// If you specify allow_padding = true,
-  /// it is permitted to have row_offset < 0 and
-  /// row_offset + num_rows > mat.NumRows(), and the result will contain
-  /// repeats of the first and last rows of 'mat' as necessary.
-  CompressedMatrix(const CompressedMatrix &mat,
-                   const MatrixIndexT row_offset,
-                   const MatrixIndexT num_rows,
-                   const MatrixIndexT col_offset,
-                   const MatrixIndexT num_cols,
-                   bool allow_padding = false);
-
-  void *Data() const { return this->data_; }
-
-  /// This will resize *this and copy the contents of mat to *this.
-  template<typename Real>
-  void CopyFromMat(const MatrixBase<Real> &mat,
-                   CompressionMethod method = kAutomaticMethod);
-
-  CompressedMatrix(const CompressedMatrix &mat);
-
-  CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator.
-
-  template<typename Real>
-  CompressedMatrix &operator = (const MatrixBase<Real> &mat); // assignment operator.
-
-  /// Copies contents to matrix.  Note: mat must have the correct size.
-  /// The kTrans case uses a temporary.
-  template<typename Real>
-  void CopyToMat(MatrixBase<Real> *mat,
-                 MatrixTransposeType trans = kNoTrans) const;
-
-  void Write(std::ostream &os, bool binary) const;
-
-  void Read(std::istream &is, bool binary);
-
-  /// Returns number of rows (or zero for emtpy matrix).
-  inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 :
-      (*reinterpret_cast<GlobalHeader*>(data_)).num_rows; }
-
-  /// Returns number of columns (or zero for emtpy matrix).
-  inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 :
-      (*reinterpret_cast<GlobalHeader*>(data_)).num_cols; }
-
-  /// Copies row #row of the matrix into vector v.
-  /// Note: v must have same size as #cols.
-  template<typename Real>
-  void CopyRowToVec(MatrixIndexT row, VectorBase<Real> *v) const;
-
-  /// Copies column #col of the matrix into vector v.
-  /// Note: v must have same size as #rows.
-  template<typename Real>
-  void CopyColToVec(MatrixIndexT col, VectorBase<Real> *v) const;
-
-  /// Copies submatrix of compressed matrix into matrix dest.
-  /// Submatrix starts at row row_offset and column column_offset and its size
-  /// is defined by size of provided matrix dest
-  template<typename Real>
-  void CopyToMat(int32 row_offset,
-                 int32 column_offset,
-                 MatrixBase<Real> *dest) const;
-
-  void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); }
-
-  void Clear();
-
-  /// scales all elements of matrix by alpha.
-  /// It scales the floating point values in GlobalHeader by alpha.
-  void Scale(float alpha);
-
-  friend class Matrix<float>;
-  friend class Matrix<double>;
- private:
-
-  // This enum describes the different compressed-data formats: these are
-  // distinct from the compression methods although all of the methods apart
-  // from kAutomaticMethod dictate a particular compressed-data format.
-  //
-  //  kOneByteWithColHeaders means there is a GlobalHeader and each
-  //    column has a PerColHeader; the actual data is stored in
-  //    one byte per element, in column-major order (the mapping
-  //    from integers to floats is a little complicated).
-  //  kTwoByte means there is a global header but no PerColHeader;
-  //    the actual data is stored in two bytes per element in
-  //    row-major order; it's decompressed as:
-  //       uint16 i;  GlobalHeader g;
-  //       float f = g.min_value + i * (g.range / 65535.0)
-  //  kOneByte means there is a global header but not PerColHeader;
-  //    the data is stored in one byte per element in row-major
-  //    order and is decompressed as:
-  //       uint8 i;  GlobalHeader g;
-  //       float f = g.min_value + i * (g.range / 255.0)
-  enum DataFormat {
-    kOneByteWithColHeaders = 1,
-    kTwoByte = 2,
-    kOneByte = 3
-  };
-
-
-  // allocates data using new [], ensures byte alignment
-  // sufficient for float.
-  static void *AllocateData(int32 num_bytes);
-
-  struct GlobalHeader {
-    int32 format;     // Represents the enum DataFormat.
-    float min_value;  // min_value and range represent the ranges of the integer
-                      // data in the kTwoByte and kOneByte formats, and the
-                      // range of the PerColHeader uint16's in the
-                      // kOneByteWithColheaders format.
-    float range;
-    int32 num_rows;
-    int32 num_cols;
-  };
-
-  // This function computes the global header for compressing this data.
-  template<typename Real>
-  static inline void ComputeGlobalHeader(const MatrixBase<Real> &mat,
-                                         CompressionMethod method,
-                                         GlobalHeader *header);
-
-
-  // The number of bytes we need to request when allocating 'data_'.
-  static MatrixIndexT DataSize(const GlobalHeader &header);
-
-  // This struct is only used in format kOneByteWithColHeaders.
-  struct PerColHeader {
-    uint16 percentile_0;
-    uint16 percentile_25;
-    uint16 percentile_75;
-    uint16 percentile_100;
-  };
-
-  template<typename Real>
-  static void CompressColumn(const GlobalHeader &global_header,
-                             const Real *data, MatrixIndexT stride,
-                             int32 num_rows, PerColHeader *header,
-                             uint8 *byte_data);
-  template<typename Real>
-  static void ComputeColHeader(const GlobalHeader &global_header,
-                               const Real *data, MatrixIndexT stride,
-                               int32 num_rows, PerColHeader *header);
-
-  static inline uint16 FloatToUint16(const GlobalHeader &global_header,
-                                     float value);
-
-  // this is used only in the kOneByte compression format.
-  static inline uint8 FloatToUint8(const GlobalHeader &global_header,
-                                   float value);
-
-  static inline float Uint16ToFloat(const GlobalHeader &global_header,
-                                    uint16 value);
-
-  // this is used only in the kOneByteWithColHeaders compression format.
-  static inline uint8 FloatToChar(float p0, float p25,
-                                          float p75, float p100,
-                                          float value);
-
-  // this is used only in the kOneByteWithColHeaders compression format.
-  static inline float CharToFloat(float p0, float p25,
-                                  float p75, float p100,
-                                  uint8 value);
-
-  void *data_; // first GlobalHeader, then PerColHeader (repeated), then
-  // the byte data for each column (repeated).  Note: don't intersperse
-  // the byte data with the PerColHeaders, because of alignment issues.
-
-};
-
-/// @} end of \addtogroup matrix_group
-
-
-}  // namespace kaldi
-
-
-#endif  // KALDI_MATRIX_COMPRESSED_MATRIX_H_
--- a/speechx/speechx/kaldi/matrix/jama-eig.h
+++ b/speechx/speechx/kaldi/matrix/jama-eig.h
@ -1,924 +0,0 @@
-// matrix/jama-eig.h
-
-// Copyright 2009-2011 Microsoft Corporation 
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// This file consists of a port and modification of materials from
-//   JAMA: A Java Matrix Package
-// under the following notice: This software is a cooperative product of
-// The MathWorks and the National Institute of Standards and Technology (NIST)
-// which has been released to the public.  This notice and the original code are
-// available at http://math.nist.gov/javanumerics/jama/domain.notice
-
-
-
-#ifndef KALDI_MATRIX_JAMA_EIG_H_
-#define KALDI_MATRIX_JAMA_EIG_H_ 1
-
-#include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-// This class is not to be used externally.  See the Eig function in the Matrix
-// class in kaldi-matrix.h.  This is the external interface.
-
-template<typename Real> class EigenvalueDecomposition {
-  // This class is based on the EigenvalueDecomposition class from the JAMA
-  // library (version 1.0.2).
- public:
-  EigenvalueDecomposition(const MatrixBase<Real> &A);
-
-  ~EigenvalueDecomposition();  // free memory.
-
-  void GetV(MatrixBase<Real> *V_out) {  // V is what we call P externally; it's the matrix of
-    // eigenvectors.
-    KALDI_ASSERT(V_out->NumRows() == static_cast<MatrixIndexT>(n_)
-                 && V_out->NumCols() == static_cast<MatrixIndexT>(n_));
-    for (int i = 0; i < n_; i++)
-      for (int j = 0; j < n_; j++)
-        (*V_out)(i, j) = V(i, j);  // V(i, j) is member function.
-  }
-  void GetRealEigenvalues(VectorBase<Real> *r_out) {
-    // returns real part of eigenvalues.
-    KALDI_ASSERT(r_out->Dim() == static_cast<MatrixIndexT>(n_));
-    for (int i = 0; i < n_; i++)
-      (*r_out)(i) = d_[i];
-  }
-  void GetImagEigenvalues(VectorBase<Real> *i_out) {
-    // returns imaginary part of eigenvalues.
-    KALDI_ASSERT(i_out->Dim() == static_cast<MatrixIndexT>(n_));
-    for (int i = 0; i < n_; i++)
-      (*i_out)(i) = e_[i];
-  }
- private:
-
-  inline Real &H(int r, int c) { return H_[r*n_ + c]; }
-  inline Real &V(int r, int c) { return V_[r*n_ + c]; }
-
-  // complex division
-  inline static void cdiv(Real xr, Real xi, Real yr, Real yi, Real *cdivr, Real *cdivi) {
-    Real r, d;
-    if (std::abs(yr) > std::abs(yi)) {
-      r = yi/yr;
-      d = yr + r*yi;
-      *cdivr = (xr + r*xi)/d;
-      *cdivi = (xi - r*xr)/d;
-    } else {
-      r = yr/yi;
-      d = yi + r*yr;
-      *cdivr = (r*xr + xi)/d;
-      *cdivi = (r*xi - xr)/d;
-    }
-  }
-
-  // Nonsymmetric reduction from Hessenberg to real Schur form.
-  void Hqr2 ();
-
-
-  int n_;  // matrix dimension.
-
-  Real *d_, *e_;  // real and imaginary parts of eigenvalues.
-  Real *V_;  // the eigenvectors (P in our external notation)
-  Real *H_;  // the nonsymmetric Hessenberg form.
-  Real *ort_;  // working storage for nonsymmetric algorithm.
-
-  // Symmetric Householder reduction to tridiagonal form.
-  void Tred2 ();
-
-  // Symmetric tridiagonal QL algorithm.
-  void Tql2 ();
-
-  // Nonsymmetric reduction to Hessenberg form.
-  void Orthes ();
-
-};
-
-template class EigenvalueDecomposition<float>;  // force instantiation.
-template class EigenvalueDecomposition<double>;  // force instantiation.
-
-template<typename Real> void  EigenvalueDecomposition<Real>::Tred2() {
-  //  This is derived from the Algol procedures tred2 by
-  //  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
-  //  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutine in EISPACK.
-
-  for (int j = 0; j < n_; j++) {
-    d_[j] = V(n_-1, j);
-  }
-
-  // Householder reduction to tridiagonal form.
-
-  for (int i = n_-1; i > 0; i--) {
-
-    // Scale to avoid under/overflow.
-
-    Real scale = 0.0;
-    Real h = 0.0;
-    for (int k = 0; k < i; k++) {
-      scale = scale + std::abs(d_[k]);
-    }
-    if (scale == 0.0) {
-      e_[i] = d_[i-1];
-      for (int j = 0; j < i; j++) {
-        d_[j] = V(i-1, j);
-        V(i, j) = 0.0;
-        V(j, i) = 0.0;
-      }
-    } else {
-
-      // Generate Householder vector.
-
-      for (int k = 0; k < i; k++) {
-        d_[k] /= scale;
-        h += d_[k] * d_[k];
-      }
-      Real f = d_[i-1];
-      Real g = std::sqrt(h);
-      if (f > 0) {
-        g = -g;
-      }
-      e_[i] = scale * g;
-      h = h - f * g;
-      d_[i-1] = f - g;
-      for (int j = 0; j < i; j++) {
-        e_[j] = 0.0;
-      }
-
-      // Apply similarity transformation to remaining columns.
-
-      for (int j = 0; j < i; j++) {
-        f = d_[j];
-        V(j, i) = f;
-        g =e_[j] + V(j, j) * f;
-        for (int k = j+1; k <= i-1; k++) {
-          g += V(k, j) * d_[k];
-          e_[k] += V(k, j) * f;
-        }
-        e_[j] = g;
-      }
-      f = 0.0;
-      for (int j = 0; j < i; j++) {
-        e_[j] /= h;
-        f += e_[j] * d_[j];
-      }
-      Real hh = f / (h + h);
-      for (int j = 0; j < i; j++) {
-        e_[j] -= hh * d_[j];
-      }
-      for (int j = 0; j < i; j++) {
-        f = d_[j];
-        g = e_[j];
-        for (int k = j; k <= i-1; k++) {
-          V(k, j) -= (f * e_[k] + g * d_[k]);
-        }
-        d_[j] = V(i-1, j);
-        V(i, j) = 0.0;
-      }
-    }
-    d_[i] = h;
-  }
-
-  // Accumulate transformations.
-
-  for (int i = 0; i < n_-1; i++) {
-    V(n_-1, i) = V(i, i);
-    V(i, i) = 1.0;
-    Real h = d_[i+1];
-    if (h != 0.0) {
-      for (int k = 0; k <= i; k++) {
-        d_[k] = V(k, i+1) / h;
-      }
-      for (int j = 0; j <= i; j++) {
-        Real g = 0.0;
-        for (int k = 0; k <= i; k++) {
-          g += V(k, i+1) * V(k, j);
-        }
-        for (int k = 0; k <= i; k++) {
-          V(k, j) -= g * d_[k];
-        }
-      }
-    }
-    for (int k = 0; k <= i; k++) {
-      V(k, i+1) = 0.0;
-    }
-  }
-  for (int j = 0; j < n_; j++) {
-    d_[j] = V(n_-1, j);
-    V(n_-1, j) = 0.0;
-  }
-  V(n_-1, n_-1) = 1.0;
-   e_[0] = 0.0;
-}
-
-template<typename Real> void EigenvalueDecomposition<Real>::Tql2() {
-  //  This is derived from the Algol procedures tql2, by
-  //  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
-  //  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutine in EISPACK.
-
-  for (int i = 1; i < n_; i++) {
-     e_[i-1] = e_[i];
-  }
-   e_[n_-1] = 0.0;
-
-  Real f = 0.0;
-  Real tst1 = 0.0;
-  Real eps = std::numeric_limits<Real>::epsilon();
-  for (int l = 0; l < n_; l++) {
-
-    // Find small subdiagonal element
-
-    tst1 = std::max(tst1, std::abs(d_[l]) + std::abs(e_[l]));
-    int m = l;
-    while (m < n_) {
-      if (std::abs(e_[m]) <= eps*tst1) {
-        break;
-      }
-      m++;
-    }
-
-    // If m == l, d_[l] is an eigenvalue,
-    // otherwise, iterate.
-
-    if (m > l) {
-      int iter = 0;
-      do {
-        iter = iter + 1;  // (Could check iteration count here.)
-
-        // Compute implicit shift
-
-        Real g = d_[l];
-        Real p = (d_[l+1] - g) / (2.0 *e_[l]);
-        Real r = Hypot(p, static_cast<Real>(1.0));  // This is a Kaldi version of hypot that works with templates.
-        if (p < 0) {
-          r = -r;
-        }
-        d_[l] =e_[l] / (p + r);
-        d_[l+1] =e_[l] * (p + r);
-        Real dl1 = d_[l+1];
-        Real h = g - d_[l];
-        for (int i = l+2; i < n_; i++) {
-          d_[i] -= h;
-        }
-        f = f + h;
-
-        // Implicit QL transformation.
-
-        p = d_[m];
-        Real c = 1.0;
-        Real c2 = c;
-        Real c3 = c;
-        Real el1 =e_[l+1];
-        Real s = 0.0;
-        Real s2 = 0.0;
-        for (int i = m-1; i >= l; i--) {
-          c3 = c2;
-          c2 = c;
-          s2 = s;
-          g = c *e_[i];
-          h = c * p;
-          r = Hypot(p, e_[i]);  // This is a Kaldi version of Hypot that works with templates.
-          e_[i+1] = s * r;
-          s =e_[i] / r;
-          c = p / r;
-          p = c * d_[i] - s * g;
-          d_[i+1] = h + s * (c * g + s * d_[i]);
-
-          // Accumulate transformation.
-
-          for (int k = 0; k < n_; k++) {
-            h = V(k, i+1);
-            V(k, i+1) = s * V(k, i) + c * h;
-            V(k, i) = c * V(k, i) - s * h;
-          }
-        }
-        p = -s * s2 * c3 * el1 *e_[l] / dl1;
-        e_[l] = s * p;
-        d_[l] = c * p;
-
-        // Check for convergence.
-
-      } while (std::abs(e_[l]) > eps*tst1);
-    }
-    d_[l] = d_[l] + f;
-    e_[l] = 0.0;
-  }
-
-  // Sort eigenvalues and corresponding vectors.
-
-  for (int i = 0; i < n_-1; i++) {
-    int k = i;
-    Real p = d_[i];
-    for (int j = i+1; j < n_; j++) {
-      if (d_[j] < p) {
-        k = j;
-        p = d_[j];
-      }
-    }
-    if (k != i) {
-      d_[k] = d_[i];
-      d_[i] = p;
-      for (int j = 0; j < n_; j++) {
-        p = V(j, i);
-        V(j, i) = V(j, k);
-        V(j, k) = p;
-      }
-    }
-  }
-}
-
-template<typename Real>
-void EigenvalueDecomposition<Real>::Orthes() {
-
-  //  This is derived from the Algol procedures orthes and ortran,
-  //  by Martin and Wilkinson, Handbook for Auto. Comp.,
-  //  Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutines in EISPACK.
-
-  int low = 0;
-  int high = n_-1;
-
-  for (int m = low+1; m <= high-1; m++) {
-
-    // Scale column.
-
-    Real scale = 0.0;
-    for (int i = m; i <= high; i++) {
-      scale = scale + std::abs(H(i, m-1));
-    }
-    if (scale != 0.0) {
-
-      // Compute Householder transformation.
-
-      Real h = 0.0;
-      for (int i = high; i >= m; i--) {
-        ort_[i] = H(i, m-1)/scale;
-        h += ort_[i] * ort_[i];
-      }
-      Real g = std::sqrt(h);
-      if (ort_[m] > 0) {
-        g = -g;
-      }
-      h = h - ort_[m] * g;
-      ort_[m] = ort_[m] - g;
-
-      // Apply Householder similarity transformation
-      // H = (I-u*u'/h)*H*(I-u*u')/h)
-
-      for (int j = m; j < n_; j++) {
-        Real f = 0.0;
-        for (int i = high; i >= m; i--) {
-          f += ort_[i]*H(i, j);
-        }
-        f = f/h;
-        for (int i = m; i <= high; i++) {
-          H(i, j) -= f*ort_[i];
-        }
-      }
-
-      for (int i = 0; i <= high; i++) {
-        Real f = 0.0;
-        for (int j = high; j >= m; j--) {
-          f += ort_[j]*H(i, j);
-        }
-        f = f/h;
-        for (int j = m; j <= high; j++) {
-          H(i, j) -= f*ort_[j];
-        }
-      }
-      ort_[m] = scale*ort_[m];
-      H(m, m-1) = scale*g;
-    }
-  }
-
-  // Accumulate transformations (Algol's ortran).
-
-  for (int i = 0; i < n_; i++) {
-    for (int j = 0; j < n_; j++) {
-      V(i, j) = (i == j ? 1.0 : 0.0);
-    }
-  }
-
-  for (int m = high-1; m >= low+1; m--) {
-    if (H(m, m-1) != 0.0) {
-      for (int i = m+1; i <= high; i++) {
-        ort_[i] = H(i, m-1);
-      }
-      for (int j = m; j <= high; j++) {
-        Real g = 0.0;
-        for (int i = m; i <= high; i++) {
-          g += ort_[i] * V(i, j);
-        }
-        // Double division avoids possible underflow
-        g = (g / ort_[m]) / H(m, m-1);
-        for (int i = m; i <= high; i++) {
-          V(i, j) += g * ort_[i];
-        }
-      }
-    }
-  }
-}
-
-template<typename Real> void  EigenvalueDecomposition<Real>::Hqr2() {
-  //  This is derived from the Algol procedure hqr2,
-  //  by Martin and Wilkinson, Handbook for Auto. Comp.,
-  //  Vol.ii-Linear Algebra, and the corresponding
-  //  Fortran subroutine in EISPACK.
-
-  int nn = n_;
-  int n = nn-1;
-  int low = 0;
-  int high = nn-1;
-  Real eps = std::numeric_limits<Real>::epsilon();
-  Real exshift = 0.0;
-  Real p = 0, q = 0, r = 0, s = 0, z=0, t, w, x, y;
-
-  // Store roots isolated by balanc and compute matrix norm
-
-  Real norm = 0.0;
-  for (int i = 0; i < nn; i++) {
-    if (i < low || i > high) {
-      d_[i] = H(i, i);
-      e_[i] = 0.0;
-    }
-    for (int j = std::max(i-1, 0); j < nn; j++) {
-      norm = norm + std::abs(H(i, j));
-    }
-  }
-
-  // Outer loop over eigenvalue index
-
-  int iter = 0;
-  while (n >= low) {
-
-    // Look for single small sub-diagonal element
-
-    int l = n;
-    while (l > low) {
-      s = std::abs(H(l-1, l-1)) + std::abs(H(l, l));
-      if (s == 0.0) {
-        s = norm;
-      }
-      if (std::abs(H(l, l-1)) < eps * s) {
-        break;
-      }
-      l--;
-    }
-
-    // Check for convergence
-    // One root found
-
-    if (l == n) {
-      H(n, n) = H(n, n) + exshift;
-      d_[n] = H(n, n);
-      e_[n] = 0.0;
-      n--;
-      iter = 0;
-
-      // Two roots found
-
-    } else if (l == n-1) {
-      w = H(n, n-1) * H(n-1, n);
-      p = (H(n-1, n-1) - H(n, n)) / 2.0;
-      q = p * p + w;
-      z = std::sqrt(std::abs(q));
-      H(n, n) = H(n, n) + exshift;
-      H(n-1, n-1) = H(n-1, n-1) + exshift;
-      x = H(n, n);
-
-      // Real pair
-
-      if (q >= 0) {
-        if (p >= 0) {
-          z = p + z;
-        } else {
-          z = p - z;
-        }
-        d_[n-1] = x + z;
-        d_[n] = d_[n-1];
-        if (z != 0.0) {
-          d_[n] = x - w / z;
-        }
-        e_[n-1] = 0.0;
-        e_[n] = 0.0;
-        x = H(n, n-1);
-        s = std::abs(x) + std::abs(z);
-        p = x / s;
-        q = z / s;
-        r = std::sqrt(p * p+q * q);
-        p = p / r;
-        q = q / r;
-
-        // Row modification
-
-        for (int j = n-1; j < nn; j++) {
-          z = H(n-1, j);
-          H(n-1, j) = q * z + p * H(n, j);
-          H(n, j) = q * H(n, j) - p * z;
-        }
-
-        // Column modification
-
-        for (int i = 0; i <= n; i++) {
-          z = H(i, n-1);
-          H(i, n-1) = q * z + p * H(i, n);
-          H(i, n) = q * H(i, n) - p * z;
-        }
-
-        // Accumulate transformations
-
-        for (int i = low; i <= high; i++) {
-          z = V(i, n-1);
-          V(i, n-1) = q * z + p * V(i, n);
-          V(i, n) = q * V(i, n) - p * z;
-        }
-
-        // Complex pair
-
-      } else {
-        d_[n-1] = x + p;
-        d_[n] = x + p;
-        e_[n-1] = z;
-        e_[n] = -z;
-      }
-      n = n - 2;
-      iter = 0;
-
-      // No convergence yet
-
-    } else {
-
-      // Form shift
-
-      x = H(n, n);
-      y = 0.0;
-      w = 0.0;
-      if (l < n) {
-        y = H(n-1, n-1);
-        w = H(n, n-1) * H(n-1, n);
-      }
-
-      // Wilkinson's original ad hoc shift
-
-      if (iter == 10) {
-        exshift += x;
-        for (int i = low; i <= n; i++) {
-          H(i, i) -= x;
-        }
-        s = std::abs(H(n, n-1)) + std::abs(H(n-1, n-2));
-        x = y = 0.75 * s;
-        w = -0.4375 * s * s;
-      }
-
-      // MATLAB's new ad hoc shift
-
-      if (iter == 30) {
-        s = (y - x) / 2.0;
-        s = s * s + w;
-        if (s > 0) {
-          s = std::sqrt(s);
-          if (y < x) {
-            s = -s;
-          }
-          s = x - w / ((y - x) / 2.0 + s);
-          for (int i = low; i <= n; i++) {
-            H(i, i) -= s;
-          }
-          exshift += s;
-          x = y = w = 0.964;
-        }
-      }
-
-      iter = iter + 1;   // (Could check iteration count here.)
-
-      // Look for two consecutive small sub-diagonal elements
-
-      int m = n-2;
-      while (m >= l) {
-        z = H(m, m);
-        r = x - z;
-        s = y - z;
-        p = (r * s - w) / H(m+1, m) + H(m, m+1);
-        q = H(m+1, m+1) - z - r - s;
-        r = H(m+2, m+1);
-        s = std::abs(p) + std::abs(q) + std::abs(r);
-        p = p / s;
-        q = q / s;
-        r = r / s;
-        if (m == l) {
-          break;
-        }
-        if (std::abs(H(m, m-1)) * (std::abs(q) + std::abs(r)) <
-            eps * (std::abs(p) * (std::abs(H(m-1, m-1)) + std::abs(z) +
-                                  std::abs(H(m+1, m+1))))) {
-          break;
-        }
-        m--;
-      }
-
-      for (int i = m+2; i <= n; i++) {
-        H(i, i-2) = 0.0;
-        if (i > m+2) {
-          H(i, i-3) = 0.0;
-        }
-      }
-
-      // Double QR step involving rows l:n and columns m:n
-
-      for (int k = m; k <= n-1; k++) {
-        bool notlast = (k != n-1);
-        if (k != m) {
-          p = H(k, k-1);
-          q = H(k+1, k-1);
-          r = (notlast ? H(k+2, k-1) : 0.0);
-          x = std::abs(p) + std::abs(q) + std::abs(r);
-          if (x != 0.0) {
-            p = p / x;
-            q = q / x;
-            r = r / x;
-          }
-        }
-        if (x == 0.0) {
-          break;
-        }
-        s = std::sqrt(p * p + q * q + r * r);
-        if (p < 0) {
-          s = -s;
-        }
-        if (s != 0) {
-          if (k != m) {
-            H(k, k-1) = -s * x;
-          } else if (l != m) {
-            H(k, k-1) = -H(k, k-1);
-          }
-          p = p + s;
-          x = p / s;
-          y = q / s;
-          z = r / s;
-          q = q / p;
-          r = r / p;
-
-          // Row modification
-
-          for (int j = k; j < nn; j++) {
-            p = H(k, j) + q * H(k+1, j);
-            if (notlast) {
-              p = p + r * H(k+2, j);
-              H(k+2, j) = H(k+2, j) - p * z;
-            }
-            H(k, j) = H(k, j) - p * x;
-            H(k+1, j) = H(k+1, j) - p * y;
-          }
-
-          // Column modification
-
-          for (int i = 0; i <= std::min(n, k+3); i++) {
-            p = x * H(i, k) + y * H(i, k+1);
-            if (notlast) {
-              p = p + z * H(i, k+2);
-              H(i, k+2) = H(i, k+2) - p * r;
-            }
-            H(i, k) = H(i, k) - p;
-            H(i, k+1) = H(i, k+1) - p * q;
-          }
-
-          // Accumulate transformations
-
-          for (int i = low; i <= high; i++) {
-            p = x * V(i, k) + y * V(i, k+1);
-            if (notlast) {
-              p = p + z * V(i, k+2);
-              V(i, k+2) = V(i, k+2) - p * r;
-            }
-            V(i, k) = V(i, k) - p;
-            V(i, k+1) = V(i, k+1) - p * q;
-          }
-        }  // (s != 0)
-      }  // k loop
-    }  // check convergence
-  }  // while (n >= low)
-
-  // Backsubstitute to find vectors of upper triangular form
-
-  if (norm == 0.0) {
-    return;
-  }
-
-  for (n = nn-1; n >= 0; n--) {
-    p = d_[n];
-    q = e_[n];
-
-    // Real vector
-
-    if (q == 0) {
-      int l = n;
-      H(n, n) = 1.0;
-      for (int i = n-1; i >= 0; i--) {
-        w = H(i, i) - p;
-        r = 0.0;
-        for (int j = l; j <= n; j++) {
-          r = r + H(i, j) * H(j, n);
-        }
-        if (e_[i] < 0.0) {
-          z = w;
-          s = r;
-        } else {
-          l = i;
-          if (e_[i] == 0.0) {
-            if (w != 0.0) {
-              H(i, n) = -r / w;
-            } else {
-              H(i, n) = -r / (eps * norm);
-            }
-
-            // Solve real equations
-
-          } else {
-            x = H(i, i+1);
-            y = H(i+1, i);
-            q = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i];
-            t = (x * s - z * r) / q;
-            H(i, n) = t;
-            if (std::abs(x) > std::abs(z)) {
-              H(i+1, n) = (-r - w * t) / x;
-            } else {
-              H(i+1, n) = (-s - y * t) / z;
-            }
-          }
-
-          // Overflow control
-
-          t = std::abs(H(i, n));
-          if ((eps * t) * t > 1) {
-            for (int j = i; j <= n; j++) {
-              H(j, n) = H(j, n) / t;
-            }
-          }
-        }
-      }
-
-      // Complex vector
-
-    } else if (q < 0) {
-      int l = n-1;
-
-      // Last vector component imaginary so matrix is triangular
-
-      if (std::abs(H(n, n-1)) > std::abs(H(n-1, n))) {
-        H(n-1, n-1) = q / H(n, n-1);
-        H(n-1, n) = -(H(n, n) - p) / H(n, n-1);
-      } else {
-        Real cdivr, cdivi;
-        cdiv(0.0, -H(n-1, n), H(n-1, n-1)-p, q, &cdivr, &cdivi);
-        H(n-1, n-1) = cdivr;
-        H(n-1, n) = cdivi;
-      }
-      H(n, n-1) = 0.0;
-      H(n, n) = 1.0;
-      for (int i = n-2; i >= 0; i--) {
-        Real ra, sa, vr, vi;
-        ra = 0.0;
-        sa = 0.0;
-        for (int j = l; j <= n; j++) {
-          ra = ra + H(i, j) * H(j, n-1);
-          sa = sa + H(i, j) * H(j, n);
-        }
-        w = H(i, i) - p;
-
-        if (e_[i] < 0.0) {
-          z = w;
-          r = ra;
-          s = sa;
-        } else {
-          l = i;
-          if (e_[i] == 0) {
-            Real cdivr, cdivi;
-            cdiv(-ra, -sa, w, q, &cdivr, &cdivi);
-            H(i, n-1) = cdivr;
-            H(i, n) = cdivi;
-          } else {
-            Real cdivr, cdivi;
-            // Solve complex equations
-
-            x = H(i, i+1);
-            y = H(i+1, i);
-            vr = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i] - q * q;
-            vi = (d_[i] - p) * 2.0 * q;
-            if (vr == 0.0 && vi == 0.0) {
-              vr = eps * norm * (std::abs(w) + std::abs(q) +
-                                 std::abs(x) + std::abs(y) + std::abs(z));
-            }
-            cdiv(x*r-z*ra+q*sa, x*s-z*sa-q*ra, vr, vi, &cdivr, &cdivi);
-            H(i, n-1) = cdivr;
-            H(i, n) = cdivi;
-            if (std::abs(x) > (std::abs(z) + std::abs(q))) {
-              H(i+1, n-1) = (-ra - w * H(i, n-1) + q * H(i, n)) / x;
-              H(i+1, n) = (-sa - w * H(i, n) - q * H(i, n-1)) / x;
-            } else {
-              cdiv(-r-y*H(i, n-1), -s-y*H(i, n), z, q, &cdivr, &cdivi);
-              H(i+1, n-1) = cdivr;
-              H(i+1, n) = cdivi;
-            }
-          }
-
-          // Overflow control
-
-          t = std::max(std::abs(H(i, n-1)), std::abs(H(i, n)));
-          if ((eps * t) * t > 1) {
-            for (int j = i; j <= n; j++) {
-              H(j, n-1) = H(j, n-1) / t;
-              H(j, n) = H(j, n) / t;
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // Vectors of isolated roots
-
-  for (int i = 0; i < nn; i++) {
-    if (i < low || i > high) {
-      for (int j = i; j < nn; j++) {
-        V(i, j) = H(i, j);
-      }
-    }
-  }
-
-  // Back transformation to get eigenvectors of original matrix
-
-  for (int j = nn-1; j >= low; j--) {
-    for (int i = low; i <= high; i++) {
-      z = 0.0;
-      for (int k = low; k <= std::min(j, high); k++) {
-        z = z + V(i, k) * H(k, j);
-      }
-      V(i, j) = z;
-    }
-  }
-}
-
-template<typename Real>
-EigenvalueDecomposition<Real>::EigenvalueDecomposition(const MatrixBase<Real> &A) {
-  KALDI_ASSERT(A.NumCols() == A.NumRows() && A.NumCols() >= 1);
-  n_ = A.NumRows();
-  V_ = new Real[n_*n_];
-  d_ = new Real[n_];
-  e_ = new Real[n_];
-  H_ = NULL;
-  ort_ = NULL;
-  if (A.IsSymmetric(0.0)) {
-
-    for (int i = 0; i < n_; i++)
-      for (int j = 0; j < n_; j++)
-        V(i, j) = A(i, j);  // Note that V(i, j) is a member function; A(i, j) is an operator
-    // of the matrix A.
-    // Tridiagonalize.
-    Tred2();
-
-    // Diagonalize.
-    Tql2();
-  } else {
-    H_ = new Real[n_*n_];
-    ort_ = new Real[n_];
-    for (int i = 0; i < n_; i++)
-      for (int j = 0; j < n_; j++)
-        H(i, j) = A(i, j);  // as before: H is member function, A(i, j) is operator of matrix.
-
-    // Reduce to Hessenberg form.
-    Orthes();
-
-    // Reduce Hessenberg to real Schur form.
-    Hqr2();
-  }
-}
-
-template<typename Real>
-EigenvalueDecomposition<Real>::~EigenvalueDecomposition() {
-  delete [] d_;
-  delete [] e_;
-  delete [] V_;
-  delete [] H_;
-  delete [] ort_;
-}
-
-// see function MatrixBase<Real>::Eig in kaldi-matrix.cc
-
-
-} // namespace kaldi
-
-#endif // KALDI_MATRIX_JAMA_EIG_H_
--- a/speechx/speechx/kaldi/matrix/jama-svd.h
+++ b/speechx/speechx/kaldi/matrix/jama-svd.h
@ -1,531 +0,0 @@
-// matrix/jama-svd.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// This file consists of a port and modification of materials from
-//   JAMA: A Java Matrix Package
-// under the following notice: This software is a cooperative product of
-// The MathWorks and the National Institute of Standards and Technology (NIST)
-// which has been released to the public.  This notice and the original code are
-// available at http://math.nist.gov/javanumerics/jama/domain.notice
-
-
-#ifndef KALDI_MATRIX_JAMA_SVD_H_
-#define KALDI_MATRIX_JAMA_SVD_H_ 1
-
-
-#include "matrix/kaldi-matrix.h"
-#include "matrix/sp-matrix.h"
-#include "matrix/cblas-wrappers.h"
-
-namespace kaldi {
-
-#if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
-// using ATLAS as our math library, which doesn't have SVD -> need
-// to implement it.
-
-// This routine is a modified form of jama_svd.h which is part of the TNT distribution.
-// (originally comes from JAMA).
-
-/** Singular Value Decomposition.
- * <P>
- * For an m-by-n matrix A with m >= n, the singular value decomposition is
- * an m-by-n orthogonal matrix U, an n-by-n diagonal matrix S, and
- * an n-by-n orthogonal matrix V so that A = U*S*V'.
- * <P>
- * The singular values, sigma[k] = S(k, k), are ordered so that
- * sigma[0] >= sigma[1] >= ... >= sigma[n-1].
- * <P>
- * The singular value decompostion always exists, so the constructor will
- * never fail.  The matrix condition number and the effective numerical
- * rank can be computed from this decomposition.
-
- * <p>
- *     (Adapted from JAMA, a Java Matrix Library, developed by jointly
- *     by the Mathworks and NIST; see  http://math.nist.gov/javanumerics/jama).
- */
-
-
-template<typename Real>
-bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in,
-                               MatrixBase<Real> *U_in,
-                               MatrixBase<Real> *V_in) {  //  Destructive!
-  KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this);
-  int wantu = (U_in != NULL), wantv = (V_in != NULL);
-  Matrix<Real> Utmp, Vtmp;
-  MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp);
-  VectorBase<Real> &s = *s_in;
-
-  int m = num_rows_, n = num_cols_;
-  KALDI_ASSERT(m>=n && m != 0 && n != 0);
-  if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n);
-  if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n);
-  KALDI_ASSERT((int)s.Dim() == n);  // n<=m so n is min.
-
-  int nu = n;
-  U.SetZero();  // make sure all zero.
-  Vector<Real> e(n);
-  Vector<Real> work(m);
-  MatrixBase<Real> &A(*this);
-  Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(),
-      *udata = U.Data(), *vdata = V.Data();
-  int astride = static_cast<int>(A.Stride()),
-      ustride = static_cast<int>(U.Stride()),
-      vstride = static_cast<int>(V.Stride());
-  int i = 0, j = 0, k = 0;
-
-  // Reduce A to bidiagonal form, storing the diagonal elements
-  // in s and the super-diagonal elements in e.
-
-  int nct = std::min(m-1, n);
-  int nrt = std::max(0, std::min(n-2, m));
-  for (k = 0; k < std::max(nct, nrt); k++) {
-    if (k < nct) {
-
-      // Compute the transformation for the k-th column and
-      // place the k-th diagonal in s(k).
-      // Compute 2-norm of k-th column without under/overflow.
-      s(k) = 0;
-      for (i = k; i < m; i++) {
-        s(k) = hypot(s(k), A(i, k));
-      }
-      if (s(k) != 0.0) {
-        if (A(k, k) < 0.0) {
-          s(k) = -s(k);
-        }
-        for (i = k; i < m; i++) {
-          A(i, k) /= s(k);
-        }
-        A(k, k) += 1.0;
-      }
-      s(k) = -s(k);
-    }
-    for (j = k+1; j < n; j++) {
-      if ((k < nct) && (s(k) != 0.0))  {
-
-        // Apply the transformation.
-
-        Real t = cblas_Xdot(m - k, adata + astride*k + k, astride,
-                            adata + astride*k + j, astride);
-        /*for (i = k; i < m; i++) {
-          t += adata[i*astride + k]*adata[i*astride + j];  //   A(i, k)*A(i, j); // 3
-          }*/
-        t = -t/A(k, k);
-        cblas_Xaxpy(m - k, t, adata + k*astride + k, astride,
-                    adata + k*astride + j, astride);
-        /*for (i = k; i < m; i++) {
-          adata[i*astride + j] += t*adata[i*astride + k];  // A(i, j) += t*A(i, k); // 5
-          }*/
-      }
-
-      // Place the k-th row of A into e for the
-      // subsequent calculation of the row transformation.
-
-      e(j) = A(k, j);
-    }
-    if (wantu & (k < nct)) {
-
-      // Place the transformation in U for subsequent back
-      // multiplication.
-
-      for (i = k; i < m; i++) {
-        U(i, k) = A(i, k);
-      }
-    }
-    if (k < nrt) {
-
-      // Compute the k-th row transformation and place the
-      // k-th super-diagonal in e(k).
-      // Compute 2-norm without under/overflow.
-      e(k) = 0;
-      for (i = k+1; i < n; i++) {
-        e(k) = hypot(e(k), e(i));
-      }
-      if (e(k) != 0.0) {
-        if (e(k+1) < 0.0) {
-          e(k) = -e(k);
-        }
-        for (i = k+1; i < n; i++) {
-          e(i) /= e(k);
-        }
-        e(k+1) += 1.0;
-      }
-      e(k) = -e(k);
-      if ((k+1 < m) & (e(k) != 0.0)) {
-
-        // Apply the transformation.
-
-        for (i = k+1; i < m; i++) {
-          work(i) = 0.0;
-        }
-        for (j = k+1; j < n; j++) {
-          for (i = k+1; i < m; i++) {
-            workdata[i] += edata[j] * adata[i*astride + j];  // work(i) += e(j)*A(i, j); // 5
-          }
-        }
-        for (j = k+1; j < n; j++) {
-          Real t(-e(j)/e(k+1));
-          cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1,
-                      adata + (k+1)*astride + j, astride);
-          /*
-          for (i = k+1; i < m; i++) {
-            adata[i*astride + j] += t*workdata[i];  // A(i, j) += t*work(i); // 5
-            }*/
-        }
-      }
-      if (wantv) {
-
-        // Place the transformation in V for subsequent
-        // back multiplication.
-
-        for (i = k+1; i < n; i++) {
-          V(i, k) = e(i);
-        }
-      }
-    }
-  }
-
-  // Set up the final bidiagonal matrix or order p.
-
-  int p = std::min(n, m+1);
-  if (nct < n) {
-    s(nct) = A(nct, nct);
-  }
-  if (m < p) {
-    s(p-1) = 0.0;
-  }
-  if (nrt+1 < p) {
-    e(nrt) = A(nrt, p-1);
-  }
-  e(p-1) = 0.0;
-
-  // If required, generate U.
-
-  if (wantu) {
-    for (j = nct; j < nu; j++) {
-      for (i = 0; i < m; i++) {
-        U(i, j) = 0.0;
-      }
-      U(j, j) = 1.0;
-    }
-    for (k = nct-1; k >= 0; k--) {
-      if (s(k) != 0.0) {
-        for (j = k+1; j < nu; j++) {
-          Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride);
-          //for (i = k; i < m; i++) {
-          //  t += udata[i*ustride + k]*udata[i*ustride + j];  // t += U(i, k)*U(i, j); // 8
-          // }
-          t = -t/U(k, k);
-          cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride,
-                      udata + k*ustride + j, ustride);
-          /*for (i = k; i < m; i++) {
-            udata[i*ustride + j] += t*udata[i*ustride + k];  // U(i, j) += t*U(i, k); // 4
-            }*/
-        }
-        for (i = k; i < m; i++ ) {
-          U(i, k) = -U(i, k);
-        }
-        U(k, k) = 1.0 + U(k, k);
-        for (i = 0; i < k-1; i++) {
-          U(i, k) = 0.0;
-        }
-      } else {
-        for (i = 0; i < m; i++) {
-          U(i, k) = 0.0;
-        }
-        U(k, k) = 1.0;
-      }
-    }
-  }
-
-  // If required, generate V.
-
-  if (wantv) {
-    for (k = n-1; k >= 0; k--) {
-      if ((k < nrt) & (e(k) != 0.0)) {
-        for (j = k+1; j < nu; j++) {
-          Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride,
-                              vdata + (k+1)*vstride + j, vstride); 
-          /*Real t (0.0);
-          for (i = k+1; i < n; i++) {
-            t += vdata[i*vstride + k]*vdata[i*vstride + j];  // t += V(i, k)*V(i, j); // 7
-            }*/
-          t = -t/V(k+1, k);
-          cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride,
-                      vdata + (k+1)*vstride + j, vstride);
-          /*for (i = k+1; i < n; i++) {
-            vdata[i*vstride + j] += t*vdata[i*vstride + k];  // V(i, j) += t*V(i, k); // 7
-            }*/
-        }
-      }
-      for (i = 0; i < n; i++) {
-        V(i, k) = 0.0;
-      }
-      V(k, k) = 1.0;
-    }
-  }
-
-  // Main iteration loop for the singular values.
-
-  int pp = p-1;
-  int iter = 0;
-  // note: -52.0 is from Jama code; the -23 is the extension
-  // to float, because mantissa length in (double, float)
-  // is (52, 23) bits respectively.
-  Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
-  // Note: the -966 was taken from Jama code, but the -120 is a guess
-  // of how to extend this to float... the exponent in double goes
-  // from -1022 .. 1023, and in float from -126..127.  I'm not sure
-  // what the significance of 966 is, so -120 just represents a number
-  // that's a bit less negative than -126.  If we get convergence
-  // failure in float only, this may mean that we have to make the
-  // -120 value less negative.
-  Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 ));
-  
-  while (p > 0) {
-    int k = 0;
-    int kase = 0;
-
-    if (iter == 500 || iter == 750) {
-      KALDI_WARN << "Svd taking a long time: making convergence criterion less exact.";
-      eps = pow(static_cast<Real>(0.8), eps);
-      tiny = pow(static_cast<Real>(0.8), tiny);
-    }
-    if (iter > 1000) {
-      KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n;
-      return false;
-    }
-
-    // This section of the program inspects for
-    // negligible elements in the s and e arrays.  On
-    // completion the variables kase and k are set as follows.
-
-    // kase = 1     if s(p) and e(k-1) are negligible and k < p
-    // kase = 2     if s(k) is negligible and k < p
-    // kase = 3     if e(k-1) is negligible, k < p, and
-    //              s(k), ..., s(p) are not negligible (qr step).
-    // kase = 4     if e(p-1) is negligible (convergence).
-
-    for (k = p-2; k >= -1; k--) {
-      if (k == -1) {
-        break;
-      }
-      if (std::abs(e(k)) <=
-          tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) {
-        e(k) = 0.0;
-        break;
-      }
-    }
-    if (k == p-2) {
-      kase = 4;
-    } else {
-      int ks;
-      for (ks = p-1; ks >= k; ks--) {
-        if (ks == k) {
-          break;
-        }
-        Real t( (ks != p ? std::abs(e(ks)) : 0.) +
-                (ks != k+1 ? std::abs(e(ks-1)) : 0.));
-        if (std::abs(s(ks)) <= tiny + eps*t)  {
-          s(ks) = 0.0;
-          break;
-        }
-      }
-      if (ks == k) {
-        kase = 3;
-      } else if (ks == p-1) {
-        kase = 1;
-      } else {
-        kase = 2;
-        k = ks;
-      }
-    }
-    k++;
-
-    // Perform the task indicated by kase.
-
-    switch (kase) {
-
-      // Deflate negligible s(p).
-
-      case 1: {
-        Real f(e(p-2));
-        e(p-2) = 0.0;
-        for (j = p-2; j >= k; j--) {
-          Real t( hypot(s(j), f));
-          Real cs(s(j)/t);
-          Real sn(f/t);
-          s(j) = t;
-          if (j != k) {
-            f = -sn*e(j-1);
-            e(j-1) = cs*e(j-1);
-          }
-          if (wantv) {
-            for (i = 0; i < n; i++) {
-              t = cs*V(i, j) + sn*V(i, p-1);
-              V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1);
-              V(i, j) = t;
-            }
-          }
-        }
-      }
-        break;
-
-        // Split at negligible s(k).
-
-      case 2: {
-        Real f(e(k-1));
-        e(k-1) = 0.0;
-        for (j = k; j < p; j++) {
-          Real t(hypot(s(j), f));
-          Real cs( s(j)/t);
-          Real sn(f/t);
-          s(j) = t;
-          f = -sn*e(j);
-          e(j) = cs*e(j);
-          if (wantu) {
-            for (i = 0; i < m; i++) {
-              t = cs*U(i, j) + sn*U(i, k-1);
-              U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1);
-              U(i, j) = t;
-            }
-          }
-        }
-      }
-        break;
-
-        // Perform one qr step.
-
-      case 3: {
-
-        // Calculate the shift.
-
-        Real scale = std::max(std::max(std::max(std::max(
-            std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))),
-                                       std::abs(s(k))), std::abs(e(k)));
-        Real sp = s(p-1)/scale;
-        Real spm1 = s(p-2)/scale;
-        Real epm1 = e(p-2)/scale;
-        Real sk = s(k)/scale;
-        Real ek = e(k)/scale;
-        Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0;
-        Real c = (sp*epm1)*(sp*epm1);
-        Real shift = 0.0;
-        if ((b != 0.0) || (c != 0.0)) {
-          shift = std::sqrt(b*b + c);
-          if (b < 0.0) {
-            shift = -shift;
-          }
-          shift = c/(b + shift);
-        }
-        Real f = (sk + sp)*(sk - sp) + shift;
-        Real g = sk*ek;
-
-        // Chase zeros.
-
-        for (j = k; j < p-1; j++) {
-          Real t = hypot(f, g);
-          Real cs = f/t;
-          Real sn = g/t;
-          if (j != k) {
-            e(j-1) = t;
-          }
-          f = cs*s(j) + sn*e(j);
-          e(j) = cs*e(j) - sn*s(j);
-          g = sn*s(j+1);
-          s(j+1) = cs*s(j+1);
-          if (wantv) {
-            cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn);
-            /*for (i = 0; i < n; i++) {
-              t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1];  // t = cs*V(i, j) + sn*V(i, j+1);         // 13
-              vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1];  // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5
-              vdata[i*vstride + j] = t;  // V(i, j) = t; // 4
-              }*/
-          }
-          t = hypot(f, g);
-          cs = f/t;
-          sn = g/t;
-          s(j) = t;
-          f = cs*e(j) + sn*s(j+1);
-          s(j+1) = -sn*e(j) + cs*s(j+1);
-          g = sn*e(j+1);
-          e(j+1) = cs*e(j+1);
-          if (wantu && (j < m-1)) {
-            cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn);
-            /*for (i = 0; i < m; i++) {
-              t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1];  // t = cs*U(i, j) + sn*U(i, j+1); // 7
-              udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1];  // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8
-              udata[i*ustride + j] = t;  // U(i, j) = t; // 1
-              }*/
-          }
-        }
-        e(p-2) = f;
-        iter = iter + 1;
-      }
-        break;
-
-        // Convergence.
-
-      case 4: {
-
-        // Make the singular values positive.
-
-        if (s(k) <= 0.0) {
-          s(k) = (s(k) < 0.0 ? -s(k) : 0.0);
-          if (wantv) {
-            for (i = 0; i <= pp; i++) {
-              V(i, k) = -V(i, k);
-            }
-          }
-        }
-
-        // Order the singular values.
-
-        while (k < pp) {
-          if (s(k) >= s(k+1)) {
-            break;
-          }
-          Real t = s(k);
-          s(k) = s(k+1);
-          s(k+1) = t;
-          if (wantv && (k < n-1)) {
-            for (i = 0; i < n; i++) {
-              t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t;
-            }
-          }
-          if (wantu && (k < m-1)) {
-            for (i = 0; i < m; i++) {
-              t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t;
-            }
-          }
-          k++;
-        }
-        iter = 0;
-        p--;
-      }
-        break;
-    }
-  }
-  return true;
-}
-
-#endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
-
-} // namespace kaldi
-
-#endif // KALDI_MATRIX_JAMA_SVD_H_
--- a/speechx/speechx/kaldi/matrix/kaldi-blas.h
+++ b/speechx/speechx/kaldi/matrix/kaldi-blas.h
@ -1,139 +0,0 @@
-// matrix/kaldi-blas.h
-
-// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-//  http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_MATRIX_KALDI_BLAS_H_
-#define KALDI_MATRIX_KALDI_BLAS_H_
-
-// This file handles the #includes for BLAS, LAPACK and so on.
-// It manipulates the declarations into a common format that kaldi can handle.
-// However, the kaldi code will check whether HAVE_ATLAS is defined as that
-// code is called a bit differently from CLAPACK that comes from other sources.
-
-// There are three alternatives:
-//   (i) you have ATLAS, which includes the ATLAS implementation of CBLAS
-//   plus a subset of CLAPACK (but with clapack_ in the function declarations).
-//   In this case, define HAVE_ATLAS and make sure the relevant directories are
-//   in the include path.
-
-//   (ii) you have CBLAS (some implementation thereof) plus CLAPACK.
-//   In this case, define HAVE_CLAPACK.
-//   [Since CLAPACK depends on BLAS, the presence of BLAS is implicit].
-
-//  (iii) you have the MKL library, which includes CLAPACK and CBLAS.
-
-// Note that if we are using ATLAS, no Svd implementation is supplied,
-// so we define HAVE_Svd to be zero and this directs our implementation to
-// supply its own "by hand" implementation which is based on TNT code.
-
-
-
-#define HAVE_OPENBLAS
-
-#if (defined(HAVE_CLAPACK) && (defined(HAVE_ATLAS) || defined(HAVE_MKL))) \
-    || (defined(HAVE_ATLAS) && defined(HAVE_MKL))
-#error "Do not define more than one of HAVE_CLAPACK, HAVE_ATLAS and HAVE_MKL"
-#endif
-
-#ifdef HAVE_ATLAS
-  extern "C" {
-    #include "cblas.h"
-    #include "clapack.h"
-  }
-#elif defined(HAVE_CLAPACK)
-  #ifdef __APPLE__
-    #ifndef __has_extension
-    #define __has_extension(x) 0
-    #endif
-    #define vImage_Utilities_h
-    #define vImage_CVUtilities_h
-    #include <Accelerate/Accelerate.h>
-    typedef __CLPK_integer          integer;
-    typedef __CLPK_logical          logical;
-    typedef __CLPK_real             real;
-    typedef __CLPK_doublereal       doublereal;
-    typedef __CLPK_complex          complex;
-    typedef __CLPK_doublecomplex    doublecomplex;
-    typedef __CLPK_ftnlen           ftnlen;
-  #else
-    extern "C" {
-      // May be in /usr/[local]/include if installed; else this uses the one
-      // from the tools/CLAPACK_include directory.
-      #include <cblas.h>
-      #include <f2c.h>
-      #include <clapack.h>
-
-      // get rid of macros from f2c.h -- these are dangerous.
-      #undef abs
-      #undef dabs
-      #undef min
-      #undef max
-      #undef dmin
-      #undef dmax
-      #undef bit_test
-      #undef bit_clear
-      #undef bit_set
-    }
-  #endif
-#elif defined(HAVE_MKL)
-  extern "C" {
-    #include <mkl.h>
-  }
-#elif defined(HAVE_OPENBLAS)
-  // getting cblas.h and lapacke.h from <openblas-install-dir>/.
-  // putting in "" not <> to search -I before system libraries.
-  #if defined(_MSC_VER)
-    #include <complex.h>
-    #define LAPACK_COMPLEX_CUSTOM
-    #define lapack_complex_float _Fcomplex
-    #define lapack_complex_double _Dcomplex
-  #endif
-  #include "cblas.h"
-  #include "lapacke.h"
-  #undef I
-  #undef complex
-  // get rid of macros from f2c.h -- these are dangerous.
-  #undef abs
-  #undef dabs
-  #undef min
-  #undef max
-  #undef dmin
-  #undef dmax
-  #undef bit_test
-  #undef bit_clear
-  #undef bit_set
-#else
-  #error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)"
-#endif
-
-#ifdef HAVE_OPENBLAS
-typedef int KaldiBlasInt; // try int.
-#endif
-#ifdef HAVE_CLAPACK
-typedef integer KaldiBlasInt;
-#endif
-#ifdef HAVE_MKL
-typedef MKL_INT KaldiBlasInt;
-#endif
-
-#ifdef HAVE_ATLAS
-// in this case there is no need for KaldiBlasInt-- this typedef is only needed
-// for Svd code which is not included in ATLAS (we re-implement it).
-#endif
-
-
-#endif  // KALDI_MATRIX_KALDI_BLAS_H_
--- a/speechx/speechx/kaldi/matrix/kaldi-vector.h
+++ b/speechx/speechx/kaldi/matrix/kaldi-vector.h
@ -1,612 +0,0 @@
-// matrix/kaldi-vector.h
-
-// Copyright 2009-2012   Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
-//                       Saarland University (Author: Arnab Ghoshal);
-//                       Ariya Rastrow;  Petr Schwarz;  Yanmin Qian;
-//                       Karel Vesely;  Go Vivace Inc.;  Arnab Ghoshal
-//                       Wei Shi;
-//                2015   Guoguo Chen
-//                2017   Daniel Galvez
-//                2019   Yiwen Shao
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_MATRIX_KALDI_VECTOR_H_
-#define KALDI_MATRIX_KALDI_VECTOR_H_ 1
-
-#include "matrix/matrix-common.h"
-
-namespace kaldi {
-
-/// \addtogroup matrix_group
-/// @{
-
-///  Provides a vector abstraction class.
-///  This class provides a way to work with vectors in kaldi.
-///  It encapsulates basic operations and memory optimizations.
-template<typename Real>
-class VectorBase {
- public:
-  /// Set vector to all zeros.
-  void SetZero();
-
-  /// Returns true if matrix is all zeros.
-  bool IsZero(Real cutoff = 1.0e-06) const;     // replace magic number
-
-  /// Set all members of a vector to a specified value.
-  void Set(Real f);
-
-  /// Set vector to random normally-distributed noise.
-  void SetRandn();
-
-  /// Sets to numbers uniformly distributed on (0,1)
-  void SetRandUniform();
-
-  /// This function returns a random index into this vector,
-  /// chosen with probability proportional to the corresponding
-  /// element.  Requires that this->Min() >= 0 and this->Sum() > 0.
-  MatrixIndexT RandCategorical() const;
-
-  /// Returns the  dimension of the vector.
-  inline MatrixIndexT Dim() const { return dim_; }
-
-  /// Returns the size in memory of the vector, in bytes.
-  inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); }
-
-  /// Returns a pointer to the start of the vector's data.
-  inline Real* Data() { return data_; }
-
-  /// Returns a pointer to the start of the vector's data (const).
-  inline const Real* Data() const { return data_; }
-
-  /// Indexing  operator (const).
-  inline Real operator() (MatrixIndexT i) const {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(dim_));
-    return *(data_ + i);
-  }
-
-  /// Indexing operator (non-const).
-  inline Real & operator() (MatrixIndexT i) {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(dim_));
-    return *(data_ + i);
-  }
-
-  /** @brief Returns a sub-vector of a vector (a range of elements).
-   *  @param o [in] Origin, 0 < o < Dim()
-   *  @param l [in] Length 0 < l < Dim()-o
-   *  @return A SubVector object that aliases the data of the Vector object.
-   *  See @c SubVector class for details   */
-  SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
-    return SubVector<Real>(*this, o, l);
-  }
-
-  /** @brief Returns a const sub-vector of a vector (a range of elements).
-   *  @param o [in] Origin, 0 < o < Dim()
-   *  @param l [in] Length 0 < l < Dim()-o
-   *  @return A SubVector object that aliases the data of the Vector object.
-   *  See @c SubVector class for details   */
-  const SubVector<Real> Range(const MatrixIndexT o,
-                              const MatrixIndexT l) const {
-    return SubVector<Real>(*this, o, l);
-  }
-
-  /// Copy data from another vector (must match own size).
-  void CopyFromVec(const VectorBase<Real> &v);
-
-  /// Copy data from a SpMatrix or TpMatrix (must match own size).
-  template<typename OtherReal>
-  void CopyFromPacked(const PackedMatrix<OtherReal> &M);
-
-  /// Copy data from another vector of different type (double vs. float)
-  template<typename OtherReal>
-  void CopyFromVec(const VectorBase<OtherReal> &v);
-
-  /// Copy from CuVector.  This is defined in ../cudamatrix/cu-vector.h
-  template<typename OtherReal>
-  void CopyFromVec(const CuVectorBase<OtherReal> &v);
-
-  /// Applies floor to all elements. Returns number of elements
-  /// floored in floored_count if it is non-null.
-  void Floor(const VectorBase<Real> &v, Real floor_val, MatrixIndexT *floored_count = nullptr);
-
-  /// Applies ceiling to all elements. Returns number of elements
-  /// changed in ceiled_count if it is non-null.
-  void Ceiling(const VectorBase<Real> &v, Real ceil_val, MatrixIndexT *ceiled_count = nullptr);
-
-  void Pow(const VectorBase<Real> &v, Real power);
-
-  /// Apply natural log to all elements.  Throw if any element of
-  /// the vector is negative (but doesn't complain about zero; the
-  /// log will be -infinity
-  void ApplyLog();
-
-  /// Apply natural log to another vector and put result in *this.
-  void ApplyLogAndCopy(const VectorBase<Real> &v);
-
-  /// Apply exponential to each value in vector.
-  void ApplyExp();
-
-  /// Take absolute value of each of the elements
-  void ApplyAbs();
-
-  /// Applies floor to all elements. Returns number of elements
-  /// floored in floored_count if it is non-null.
-  inline void ApplyFloor(Real floor_val, MatrixIndexT *floored_count = nullptr) {
-    this->Floor(*this, floor_val, floored_count);
-  };
-
-  /// Applies ceiling to all elements. Returns number of elements
-  /// changed in ceiled_count if it is non-null.
-  inline void ApplyCeiling(Real ceil_val, MatrixIndexT *ceiled_count = nullptr) {
-    this->Ceiling(*this, ceil_val, ceiled_count);
-  };
-
-  /// Applies floor to all elements. Returns number of elements floored.
-  MatrixIndexT ApplyFloor(const VectorBase<Real> &floor_vec);
-
-  /// Apply soft-max to vector and return normalizer (log sum of exponentials).
-  /// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
-  Real ApplySoftMax();
-
-  /// Applies log soft-max to vector and returns normalizer (log sum of
-  /// exponentials).
-  /// This is the same as: \f$ x(i) = x(i) - log(\sum_i exp(x(i))) \f$
-  Real ApplyLogSoftMax();
-
-  /// Sets each element of *this to the tanh of the corresponding element of "src".
-  void Tanh(const VectorBase<Real> &src);
-
-  /// Sets each element of *this to the sigmoid function of the corresponding
-  /// element of "src".
-  void Sigmoid(const VectorBase<Real> &src);
-
-  /// Take all  elements of vector to a power.
-  inline void ApplyPow(Real power) {
-    this->Pow(*this, power);
-  };
-
-  /// Take the absolute value of all elements of a vector to a power.
-  /// Include the sign of the input element if include_sign == true.
-  /// If power is negative and the input value is zero, the output is set zero.
-  void ApplyPowAbs(Real power, bool include_sign=false);
-
-  /// Compute the p-th norm of the vector.
-  Real Norm(Real p) const;
-
-  /// Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
-  bool ApproxEqual(const VectorBase<Real> &other, float tol = 0.01) const;
-
-  /// Invert all elements.
-  void InvertElements();
-
-  /// Add vector : *this = *this + alpha * rv (with casting between floats and
-  /// doubles)
-  template<typename OtherReal>
-  void AddVec(const Real alpha, const VectorBase<OtherReal> &v);
-
-  /// Add vector : *this = *this + alpha * rv^2  [element-wise squaring].
-  void AddVec2(const Real alpha, const VectorBase<Real> &v);
-
-  /// Add vector : *this = *this + alpha * rv^2  [element-wise squaring],
-  /// with casting between floats and doubles.
-  template<typename OtherReal>
-  void AddVec2(const Real alpha, const VectorBase<OtherReal> &v);
-
-  /// Add matrix times vector : this <-- beta*this + alpha*M*v.
-  /// Calls BLAS GEMV.
-  void AddMatVec(const Real alpha, const MatrixBase<Real> &M,
-                 const MatrixTransposeType trans,  const VectorBase<Real> &v,
-                 const Real beta); // **beta previously defaulted to 0.0**
-
-  /// This is as AddMatVec, except optimized for where v contains a lot
-  /// of zeros.
-  void AddMatSvec(const Real alpha, const MatrixBase<Real> &M,
-                  const MatrixTransposeType trans,  const VectorBase<Real> &v,
-                  const Real beta); // **beta previously defaulted to 0.0**
-
-
-  /// Add symmetric positive definite matrix times vector:
-  ///  this <-- beta*this + alpha*M*v.   Calls BLAS SPMV.
-  void AddSpVec(const Real alpha, const SpMatrix<Real> &M,
-                const VectorBase<Real> &v, const Real beta);  // **beta previously defaulted to 0.0**
-
-  /// Add triangular matrix times vector: this <-- beta*this + alpha*M*v.
-  /// Works even if rv == *this.
-  void AddTpVec(const Real alpha, const TpMatrix<Real> &M,
-                const MatrixTransposeType trans, const VectorBase<Real> &v,
-                const Real beta);  // **beta previously defaulted to 0.0**
-
-  /// Set each element to y = (x == orig ? changed : x).
-  void ReplaceValue(Real orig, Real changed);
-
-  /// Multiply element-by-element by another vector.
-  void MulElements(const VectorBase<Real> &v);
-  /// Multiply element-by-element by another vector of different type.
-  template<typename OtherReal>
-  void MulElements(const VectorBase<OtherReal> &v);
-
-  /// Divide element-by-element by a vector.
-  void DivElements(const VectorBase<Real> &v);
-  /// Divide element-by-element by a vector of different type.
-  template<typename OtherReal>
-  void DivElements(const VectorBase<OtherReal> &v);
-
-  /// Add a constant to each element of a vector.
-  void Add(Real c);
-
-  /// Add element-by-element product of vectors:
-  //  this <-- alpha * v .* r + beta*this .
-  void AddVecVec(Real alpha, const VectorBase<Real> &v,
-                 const VectorBase<Real> &r, Real beta);
-
-  /// Add element-by-element quotient of two vectors.
-  ///  this <---- alpha*v/r + beta*this
-  void AddVecDivVec(Real alpha, const VectorBase<Real> &v,
-                    const VectorBase<Real> &r, Real beta);
-
-  /// Multiplies all elements by this constant.
-  void Scale(Real alpha);
-
-  /// Multiplies this vector by lower-triangular matrix:  *this <-- *this *M
-  void MulTp(const TpMatrix<Real> &M, const MatrixTransposeType trans);
-
-  /// If trans == kNoTrans, solves M x = b, where b is the value of *this at input
-  /// and x is the value of *this at output.
-  /// If trans == kTrans, solves M' x = b.
-  /// Does not test for M being singular or near-singular, so test it before
-  /// calling this routine.
-  void Solve(const TpMatrix<Real> &M, const MatrixTransposeType trans);
-
-  /// Performs a row stack of the matrix M
-  void CopyRowsFromMat(const MatrixBase<Real> &M);
-  template<typename OtherReal>
-  void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
-
-  /// The following is implemented in ../cudamatrix/cu-matrix.cc
-  void CopyRowsFromMat(const CuMatrixBase<Real> &M);
-
-  /// Performs a column stack of the matrix M
-  void CopyColsFromMat(const MatrixBase<Real> &M);
-
-  /// Extracts a row of the matrix M.  Could also do this with
-  /// this->Copy(M[row]).
-  void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
-  /// Extracts a row of the matrix M with type conversion.
-  template<typename OtherReal>
-  void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
-
-  /// Extracts a row of the symmetric matrix S.
-  template<typename OtherReal>
-  void CopyRowFromSp(const SpMatrix<OtherReal> &S, MatrixIndexT row);
-
-  /// Extracts a column of the matrix M.
-  template<typename OtherReal>
-  void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
-
-  /// Extracts the diagonal of the matrix M.
-  void CopyDiagFromMat(const MatrixBase<Real> &M);
-
-  /// Extracts the diagonal of a packed matrix M; works for Sp or Tp.
-  void CopyDiagFromPacked(const PackedMatrix<Real> &M);
-
-
-  /// Extracts the diagonal of a symmetric matrix.
-  inline void CopyDiagFromSp(const SpMatrix<Real> &M) { CopyDiagFromPacked(M); }
-
-  /// Extracts the diagonal of a triangular matrix.
-  inline void CopyDiagFromTp(const TpMatrix<Real> &M) { CopyDiagFromPacked(M); }
-
-  /// Returns the maximum value of any element, or -infinity for the empty vector.
-  Real Max() const;
-
-  /// Returns the maximum value of any element, and the associated index.
-  /// Error if vector is empty.
-  Real Max(MatrixIndexT *index) const;
-
-  /// Returns the minimum value of any element, or +infinity for the empty vector.
-  Real Min() const;
-
-  /// Returns the minimum value of any element, and the associated index.
-  /// Error if vector is empty.
-  Real Min(MatrixIndexT *index) const;
-
-  /// Returns sum of the elements
-  Real Sum() const;
-
-  /// Returns sum of the logs of the elements.  More efficient than
-  /// just taking log of each.  Will return NaN if any elements are
-  /// negative.
-  Real SumLog() const;
-
-  /// Does *this = alpha * (sum of rows of M) + beta * *this.
-  void AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
-
-  /// Does *this = alpha * (sum of columns of M) + beta * *this.
-  void AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
-
-  /// Add the diagonal of a matrix times itself:
-  /// *this = diag(M M^T) +  beta * *this (if trans == kNoTrans), or
-  /// *this = diag(M^T M) +  beta * *this (if trans == kTrans).
-  void AddDiagMat2(Real alpha, const MatrixBase<Real> &M,
-                   MatrixTransposeType trans = kNoTrans, Real beta = 1.0);
-
-  /// Add the diagonal of a matrix product: *this = diag(M N), assuming the
-  /// "trans" arguments are both kNoTrans; for transpose arguments, it behaves
-  /// as you would expect.
-  void AddDiagMatMat(Real alpha, const MatrixBase<Real> &M, MatrixTransposeType transM,
-                     const MatrixBase<Real> &N, MatrixTransposeType transN,
-                     Real beta = 1.0);
-
-  /// Returns log(sum(exp())) without exp overflow
-  /// If prune > 0.0, ignores terms less than the max - prune.
-  /// [Note: in future, if prune = 0.0, it will take the max.
-  /// For now, use -1 if you don't want it to prune.]
-  Real LogSumExp(Real prune = -1.0) const;
-
-  /// Reads from C++ stream (option to add to existing contents).
-  /// Throws exception on failure
-  void Read(std::istream &in, bool binary, bool add = false);
-
-  /// Writes to C++ stream (option to write in binary).
-  void Write(std::ostream &Out, bool binary) const;
-
-  friend class VectorBase<double>;
-  friend class VectorBase<float>;
-  friend class CuVectorBase<Real>;
-  friend class CuVector<Real>;
- protected:
-  /// Destructor;  does not deallocate memory, this is handled by child classes.
-  /// This destructor is protected so this object can only be
-  /// deleted via a child.
-  ~VectorBase() {}
-
-  /// Empty initializer, corresponds to vector of zero size.
-  explicit VectorBase(): data_(NULL), dim_(0) {
-    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
-  }
-
-// Took this out since it is not currently used, and it is possible to create
-// objects where the allocated memory is not the same size as dim_ : Arnab
-//  /// Initializer from a pointer and a size; keeps the pointer internally
-//  /// (ownership or non-ownership depends on the child class).
-//  explicit VectorBase(Real* data, MatrixIndexT dim)
-//      : data_(data), dim_(dim) {}
-
-  // Arnab : made this protected since it is unsafe too.
-  /// Load data into the vector: sz must match own size.
-  void CopyFromPtr(const Real* Data, MatrixIndexT sz);
-
-  /// data memory area
-  Real* data_;
-  /// dimension of vector
-  MatrixIndexT dim_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
-}; // class VectorBase
-
-/** @brief A class representing a vector.
- *
- *  This class provides a way to work with vectors in kaldi.
- *  It encapsulates basic operations and memory optimizations.  */
-template<typename Real>
-class Vector: public VectorBase<Real> {
- public:
-  /// Constructor that takes no arguments.  Initializes to empty.
-  Vector(): VectorBase<Real>() {}
-
-  /// Constructor with specific size.  Sets to all-zero by default
-  /// if set_zero == false, memory contents are undefined.
-  explicit Vector(const MatrixIndexT s,
-                  MatrixResizeType resize_type = kSetZero)
-      : VectorBase<Real>() {  Resize(s, resize_type);  }
-
-  /// Copy constructor from CUDA vector
-  /// This is defined in ../cudamatrix/cu-vector.h
-  template<typename OtherReal>
-  explicit Vector(const CuVectorBase<OtherReal> &cu);
-
-  /// Copy constructor.  The need for this is controversial.
-  Vector(const Vector<Real> &v) : VectorBase<Real>()  { //  (cannot be explicit)
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-  /// Copy-constructor from base-class, needed to copy from SubVector.
-  explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-  /// Type conversion constructor.
-  template<typename OtherReal>
-  explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-// Took this out since it is unsafe : Arnab
-//  /// Constructor from a pointer and a size; copies the data to a location
-//  /// it owns.
-//  Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
-//    Resize(s);
-  //    CopyFromPtr(Data, s);
-//  }
-
-
-  /// Swaps the contents of *this and *other.  Shallow swap.
-  void Swap(Vector<Real> *other);
-
-  /// Destructor.  Deallocates memory.
-  ~Vector() { Destroy(); }
-
-  /// Read function using C++ streams.  Can also add to existing contents
-  /// of matrix.
-  void Read(std::istream &in, bool binary, bool add = false);
-
-  /// Set vector to a specified size (can be zero).
-  /// The value of the new data depends on resize_type:
-  ///   -if kSetZero, the new data will be zero
-  ///   -if kUndefined, the new data will be undefined
-  ///   -if kCopyData, the new data will be the same as the old data in any
-  ///      shared positions, and zero elsewhere.
-  /// This function takes time proportional to the number of data elements.
-  void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);
-
-  /// Remove one element and shifts later elements down.
-  void RemoveElement(MatrixIndexT i);
-
-  /// Assignment operator.
-  Vector<Real> &operator = (const Vector<Real> &other) {
-    Resize(other.Dim(), kUndefined);
-    this->CopyFromVec(other);
-    return *this;
-  }
-
-  /// Assignment operator that takes VectorBase.
-  Vector<Real> &operator = (const VectorBase<Real> &other) {
-    Resize(other.Dim(), kUndefined);
-    this->CopyFromVec(other);
-    return *this;
-  }
- private:
-  /// Init assumes the current contents of the class are invalid (i.e. junk or
-  /// has already been freed), and it sets the vector to newly allocated memory
-  /// with the specified dimension.  dim == 0 is acceptable.  The memory contents
-  /// pointed to by data_ will be undefined.
-  void Init(const MatrixIndexT dim);
-
-  /// Destroy function, called internally.
-  void Destroy();
-
-};
-
-
-/// Represents a non-allocating general vector which can be defined
-/// as a sub-vector of higher-level vector [or as the row of a matrix].
-template<typename Real>
-class SubVector : public VectorBase<Real> {
- public:
-  /// Constructor from a Vector or SubVector.
-  /// SubVectors are not const-safe and it's very hard to make them
-  /// so for now we just give up.  This function contains const_cast.
-  SubVector(const VectorBase<Real> &t, const MatrixIndexT origin,
-            const MatrixIndexT length) : VectorBase<Real>() {
-    // following assert equiv to origin>=0 && length>=0 &&
-    // origin+length <= rt.dim_
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
-                 static_cast<UnsignedMatrixIndexT>(length) <=
-                 static_cast<UnsignedMatrixIndexT>(t.Dim()));
-    VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
-    VectorBase<Real>::dim_   = length;
-  }
-
-  /// This constructor initializes the vector to point at the contents
-  /// of this packed matrix (SpMatrix or TpMatrix).
-  SubVector(const PackedMatrix<Real> &M) {
-    VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
-    VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
-  }
-
-  /// Copy constructor
-  SubVector(const SubVector &other) : VectorBase<Real> () {
-    // this copy constructor needed for Range() to work in base class.
-    VectorBase<Real>::data_ = other.data_;
-    VectorBase<Real>::dim_ = other.dim_;
-  }
-
-  /// Constructor from a pointer to memory and a length.  Keeps a pointer
-  /// to the data but does not take ownership (will never delete).
-  /// Caution: this constructor enables you to evade const constraints.
-  SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real> () {
-    VectorBase<Real>::data_ = const_cast<Real*>(data);
-    VectorBase<Real>::dim_   = length;
-  }
-
-  /// This operation does not preserve const-ness, so be careful.
-  SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
-    VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
-    VectorBase<Real>::dim_   = matrix.NumCols();
-  }
-
-  ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).
-
- private:
-  /// Disallow assignment operator.
-  SubVector & operator = (const SubVector &other) {}
-};
-
-/// @} end of "addtogroup matrix_group"
-/// \addtogroup matrix_funcs_io
-/// @{
-/// Output to a C++ stream.  Non-binary by default (use Write for
-/// binary output).
-template<typename Real>
-std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v);
-
-/// Input from a C++ stream.  Will automatically read text or
-/// binary data from the stream.
-template<typename Real>
-std::istream & operator >> (std::istream & in, VectorBase<Real> & v);
-
-/// Input from a C++ stream. Will automatically read text or
-/// binary data from the stream.
-template<typename Real>
-std::istream & operator >> (std::istream & in, Vector<Real> & v);
-/// @} end of \addtogroup matrix_funcs_io
-
-/// \addtogroup matrix_funcs_scalar
-/// @{
-
-
-template<typename Real>
-bool ApproxEqual(const VectorBase<Real> &a,
-                 const VectorBase<Real> &b, Real tol = 0.01) {
-  return a.ApproxEqual(b, tol);
-}
-
-template<typename Real>
-inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
-                        float tol = 0.01) {
-  KALDI_ASSERT(a.ApproxEqual(b, tol));
-}
-
-
-/// Returns dot product between v1 and v2.
-template<typename Real>
-Real VecVec(const VectorBase<Real> &v1, const VectorBase<Real> &v2);
-
-template<typename Real, typename OtherReal>
-Real VecVec(const VectorBase<Real> &v1, const VectorBase<OtherReal> &v2);
-
-
-/// Returns \f$ v_1^T M v_2  \f$ .
-/// Not as efficient as it could be where v1 == v2.
-template<typename Real>
-Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M,
-               const VectorBase<Real> &v2);
-
-/// @} End of "addtogroup matrix_funcs_scalar"
-
-
-}  // namespace kaldi
-
-// we need to include the implementation
-#include "matrix/kaldi-vector-inl.h"
-
-
-
-#endif  // KALDI_MATRIX_KALDI_VECTOR_H_
--- a/speechx/speechx/kaldi/matrix/matrix-functions-inl.h
+++ b/speechx/speechx/kaldi/matrix/matrix-functions-inl.h
@ -1,56 +0,0 @@
-// matrix/matrix-functions-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-//
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-//
-// (*) incorporates, with permission, FFT code from his book
-// "Signal Processing with Lapped Transforms", Artech, 1992.
-
-
-
-#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
-#define KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
-
-namespace kaldi {
-
-//! ComplexMul implements, inline, the complex multiplication b *= a.
-template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
-                                            Real *b_re, Real *b_im) {
-  Real tmp_re = (*b_re * a_re) - (*b_im * a_im);
-  *b_im = *b_re * a_im + *b_im * a_re;
-  *b_re = tmp_re;
-}
-
-template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
-                                                   const Real &b_re, const Real &b_im,
-                                                   Real *c_re, Real *c_im) {
-  *c_re += b_re*a_re - b_im*a_im;
-  *c_im += b_re*a_im + b_im*a_re;
-}
-
-
-template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im) {
-  *a_re = std::cos(x);
-  *a_im = std::sin(x);
-}
-
-
-} // end namespace kaldi
-
-
-#endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
-
--- a/speechx/speechx/kaldi/matrix/matrix-functions.cc
+++ b/speechx/speechx/kaldi/matrix/matrix-functions.cc
@ -1,773 +0,0 @@
-// matrix/matrix-functions.cc
-
-// Copyright 2009-2011  Microsoft Corporation;  Go Vivace Inc.;  Jan Silovsky
-//                      Yanmin Qian;  Saarland University;  Johns Hopkins University (Author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-//
-// (*) incorporates, with permission, FFT code from his book
-// "Signal Processing with Lapped Transforms", Artech, 1992.
-
-#include "matrix/matrix-functions.h"
-#include "matrix/sp-matrix.h"
-
-namespace kaldi {
-
-template<typename Real> void ComplexFt (const VectorBase<Real> &in,
-                                     VectorBase<Real> *out, bool forward) {
-  int exp_sign = (forward ? -1 : 1);
-  KALDI_ASSERT(out != NULL);
-  KALDI_ASSERT(in.Dim() == out->Dim());
-  KALDI_ASSERT(in.Dim() % 2 == 0);
-  int twoN = in.Dim(), N = twoN / 2;
-  const Real *data_in = in.Data();
-  Real *data_out = out->Data();
-
-  Real exp1N_re, exp1N_im;  //  forward -> exp(-2pi / N), backward -> exp(2pi / N).
-  Real fraction = exp_sign * M_2PI / static_cast<Real>(N);  // forward -> -2pi/N, backward->-2pi/N
-  ComplexImExp(fraction, &exp1N_re, &exp1N_im);
-
-  Real expm_re = 1.0, expm_im = 0.0;  // forward -> exp(-2pi m / N).
-
-  for (int two_m = 0; two_m < twoN; two_m+=2) {  // For each output component.
-    Real expmn_re = 1.0, expmn_im = 0.0;  // forward -> exp(-2pi m n / N).
-    Real sum_re = 0.0, sum_im = 0.0;  // complex output for index m (the sum expression)
-    for (int two_n = 0; two_n < twoN; two_n+=2) {
-      ComplexAddProduct(data_in[two_n], data_in[two_n+1],
-                        expmn_re, expmn_im,
-                        &sum_re, &sum_im);
-      ComplexMul(expm_re, expm_im, &expmn_re, &expmn_im);
-    }
-    data_out[two_m] = sum_re;
-    data_out[two_m + 1] = sum_im;
-
-
-    if (two_m % 10 == 0) {  // occasionally renew "expm" from scratch to avoid
-      // loss of precision.
-      int nextm = 1 + two_m/2;
-      Real fraction_mult = fraction * nextm;
-      ComplexImExp(fraction_mult, &expm_re, &expm_im);
-    } else {
-      ComplexMul(exp1N_re, exp1N_im, &expm_re, &expm_im);
-    }
-  }
-}
-
-template
-void ComplexFt (const VectorBase<float> &in,
-                VectorBase<float> *out, bool forward);
-template
-void ComplexFt (const VectorBase<double> &in,
-                VectorBase<double> *out, bool forward);
-
-
-#define KALDI_COMPLEXFFT_BLOCKSIZE 8192
-// This #define affects how we recurse in ComplexFftRecursive.
-// We assume that memory-caching happens on a scale at
-// least as small as this.
-
-
-//! ComplexFftRecursive is a recursive function that computes the
-//! complex FFT of size N.  The "nffts" arguments specifies how many
-//! separate FFTs to compute in parallel (we assume the data for
-//! each one is consecutive in memory).  The "forward argument"
-//! specifies whether to do the FFT (true) or IFFT (false), although
-//! note that we do not include the factor of 1/N (the user should
-//! do this if required.  The iterators factor_begin and factor_end
-//! point to the beginning and end (i.e. one past the last element)
-//! of an array of small factors of N (typically prime factors).
-//! See the comments below this code for the detailed equations
-//! of the recursion.
-
-
-template<typename Real>
-void ComplexFftRecursive (Real *data, int nffts, int N,
-                          const int *factor_begin,
-                          const int *factor_end, bool forward,
-                          Vector<Real> *tmp_vec) {
-  if (factor_begin == factor_end) {
-    KALDI_ASSERT(N == 1);
-    return;
-  }
-
-  {  // an optimization: compute in smaller blocks.
-    // this block of code could be removed and it would still work.
-    MatrixIndexT size_perblock = N * 2 * sizeof(Real);
-    if (nffts > 1 && size_perblock*nffts > KALDI_COMPLEXFFT_BLOCKSIZE) {  // can break it up...
-      // Break up into multiple blocks.  This is an optimization.  We make
-      // no progress on the FFT when we do this.
-      int block_skip = KALDI_COMPLEXFFT_BLOCKSIZE / size_perblock;  // n blocks per call
-      if (block_skip == 0) block_skip = 1;
-      if (block_skip < nffts) {
-        int blocks_left = nffts;
-        while (blocks_left > 0) {
-          int skip_now = std::min(blocks_left, block_skip);
-          ComplexFftRecursive(data, skip_now, N, factor_begin, factor_end, forward, tmp_vec);
-          blocks_left -= skip_now;
-          data += skip_now * N*2;
-        }
-        return;
-      } // else do the actual algorithm.
-    } // else do the actual algorithm.
-  }
-
-  int P = *factor_begin;
-  KALDI_ASSERT(P > 1);
-  int Q = N / P;
-
-
-  if (P > 1 && Q > 1) {  // Do the rearrangement.   C.f. eq. (8) below.  Transform
-    // (a) to (b).
-    Real *data_thisblock = data;
-    if (tmp_vec->Dim() < (MatrixIndexT)N) tmp_vec->Resize(N);
-    Real *data_tmp = tmp_vec->Data();
-    for (int thisfft = 0; thisfft < nffts; thisfft++, data_thisblock+=N*2) {
-      for (int offset = 0; offset < 2; offset++) {  // 0 == real, 1 == im.
-        for (int p = 0; p < P; p++) {
-          for (int q = 0; q < Q; q++) {
-            int aidx = q*P + p, bidx = p*Q + q;
-            data_tmp[bidx] = data_thisblock[2*aidx+offset];
-          }
-        }
-        for (int n = 0;n < P*Q;n++) data_thisblock[2*n+offset] = data_tmp[n];
-      }
-    }
-  }
-
-  {  // Recurse.
-    ComplexFftRecursive(data, nffts*P, Q, factor_begin+1, factor_end, forward, tmp_vec);
-  }
-
-  int exp_sign = (forward ? -1 : 1);
-  Real rootN_re, rootN_im;  // Nth root of unity.
-  ComplexImExp(static_cast<Real>(exp_sign * M_2PI / N), &rootN_re, &rootN_im);
-
-  Real rootP_re, rootP_im;  // Pth root of unity.
-  ComplexImExp(static_cast<Real>(exp_sign * M_2PI / P), &rootP_re, &rootP_im);
-
-  {  // Do the multiplication
-    // could avoid a bunch of complex multiplies by moving the loop over data_thisblock
-    // inside.
-    if (tmp_vec->Dim() < (MatrixIndexT)(P*2)) tmp_vec->Resize(P*2);
-    Real *temp_a = tmp_vec->Data();
-
-    Real *data_thisblock = data, *data_end = data+(N*2*nffts);
-    for (; data_thisblock != data_end; data_thisblock += N*2) {  // for each separate fft.
-      Real qd_re = 1.0, qd_im = 0.0;  // 1^(q'/N)
-      for (int qd = 0; qd < Q; qd++) {
-        Real pdQ_qd_re = qd_re, pdQ_qd_im = qd_im;  // 1^((p'Q+q') / N) == 1^((p'/P) + (q'/N))
-                                              // Initialize to q'/N, corresponding to p' == 0.
-        for (int pd = 0; pd < P; pd++) {  // pd == p'
-          {  // This is the p = 0 case of the loop below [an optimization].
-            temp_a[pd*2] = data_thisblock[qd*2];
-            temp_a[pd*2 + 1] = data_thisblock[qd*2 + 1];
-          }
-          {  // This is the p = 1 case of the loop below [an optimization]
-            // **** MOST OF THE TIME (>60% I think) gets spent here. ***
-            ComplexAddProduct(pdQ_qd_re, pdQ_qd_im,
-                              data_thisblock[(qd+Q)*2], data_thisblock[(qd+Q)*2 + 1],
-                              &(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
-          }
-          if (P > 2) {
-            Real p_pdQ_qd_re = pdQ_qd_re, p_pdQ_qd_im = pdQ_qd_im;  // 1^(p(p'Q+q')/N)
-            for (int p = 2; p < P; p++) {
-              ComplexMul(pdQ_qd_re, pdQ_qd_im, &p_pdQ_qd_re, &p_pdQ_qd_im);  // p_pdQ_qd *= pdQ_qd.
-              int data_idx = p*Q + qd;
-              ComplexAddProduct(p_pdQ_qd_re, p_pdQ_qd_im,
-                                data_thisblock[data_idx*2], data_thisblock[data_idx*2 + 1],
-                                &(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
-            }
-          }
-          if (pd != P-1)
-            ComplexMul(rootP_re, rootP_im, &pdQ_qd_re, &pdQ_qd_im);  // pdQ_qd *= (rootP == 1^{1/P})
-          // (using 1/P == Q/N)
-        }
-        for (int pd = 0; pd < P; pd++) {
-          data_thisblock[(pd*Q + qd)*2] = temp_a[pd*2];
-          data_thisblock[(pd*Q + qd)*2 + 1] = temp_a[pd*2 + 1];
-        }
-        ComplexMul(rootN_re, rootN_im, &qd_re, &qd_im);  // qd *= rootN.
-      }
-    }
-  }
-}
-
-/* Equations for ComplexFftRecursive.
-   We consider here one of the "nffts" separate ffts; it's just a question of
-   doing them all in parallel.  We also write all equations in terms of
-   complex math (the conversion to real arithmetic is not hard, and anyway
-   takes place inside function calls).
-
-
-   Let the input (i.e. "data" at start) be a_n, n = 0..N-1, and
-   the output (Fourier transform) be d_k, k = 0..N-1.  We use these letters because
-   there will be two intermediate variables b and c.
-   We want to compute:
-
-     d_k = \sum_n a_n 1^(kn/N)                                             (1)
-
-   where we use 1^x as shorthand for exp(-2pi x) for the forward algorithm
-   and exp(2pi x) for the backward one.
-
-   We factorize N = P Q (P small, Q usually large).
-   With p = 0..P-1 and q = 0..Q-1, and also p'=0..P-1 and q'=0..P-1, we let:
-
-    k == p'Q + q'                                                           (2)
-    n == qP + p                                                             (3)
-
-   That is, we let p, q, p', q' range over these indices and observe that this way we
-   can cover all n, k.  Expanding (1) using (2) and (3), we can write:
-
-      d_k = \sum_{p, q}  a_n 1^((p'Q+q')(qP+p)/N)
-          = \sum_{p, q}  a_n 1^(p'pQ/N) 1^(q'qP/N) 1^(q'p/N)                 (4)
-
-   using 1^(PQ/N) = 1 to get rid of the terms with PQ in them.  Rearranging (4),
-
-     d_k =  \sum_p 1^(p'pQ/N) 1^(q'p/N)  \sum_q 1^(q'qP/N) a_n              (5)
-
-   The point here is to separate the index q.  Now we can expand out the remaining
-   instances of k and n using (2) and (3):
-
-     d_(p'Q+q') =  \sum_p 1^(p'pQ/N) 1^(q'p/N)  \sum_q 1^(q'qP/N) a_(qP+p)   (6)
-
-   The expression \sum_q varies with the indices p and q'.  Let us define
-
-         C_{p, q'} =  \sum_q 1^(q'qP/N) a_(qP+p)                            (7)
-
-   Here, C_{p, q'}, viewed as a sequence in q', is just the DFT of the points
-   a_(qP+p) for q = 1..Q-1.  These points are not consecutive in memory though,
-   they jump by P each time.  Let us define b as a rearranged version of a,
-   so that
-
-         b_(pQ+q) = a_(qP+p)                                                  (8)
-
-   How to do this rearrangement in place?  In
-
-   We can rearrange (7) to be written in terms of the b's, using (8), so that
-
-         C_{p, q'} =  \sum_q 1^(q'q (P/N)) b_(pQ+q)                            (9)
-
-   Here, the sequence of C_{p, q'} over q'=0..Q-1, is just the DFT of the sequence
-   of b_(pQ) .. b_(p(Q+1)-1).  Let's arrange the C_{p, q'} in a single array in
-   memory in the same way as the b's, i.e. we define
-         c_(pQ+q') == C_{p, q'}.                                                (10)
-   Note that we could have written (10) with q in place of q', as there is only
-   one index of type q present, but q' is just a more natural variable name to use
-   since we use q' elsewhere to subscript c and C.
-
-   Rewriting (9), we have:
-         c_(pQ+q')  = \sum_q 1^(q'q (P/N)) b_(pQ+q)                            (11)
-    which is the DFT computed by the recursive call to this function [after computing
-    the b's by rearranging the a's].  From the c's we want to compute the d's.
-    Taking (6), substituting in the sum (7), and using (10) to write it as an array,
-    we have:
-         d_(p'Q+q') =  \sum_p 1^(p'pQ/N) 1^(q'p/N)  c_(pQ+q')                   (12)
-    This sum is independent for different values of q'.  Note that d overwrites c
-    in memory.  We compute this in  a direct way, using a little array of size P to
-    store the computed d values for one value of q' (we reuse the array for each value
-    of q').
-
-    So the overall picture is this:
-    We get a call to compute DFT on size N.
-
-    - If N == 1 we return (nothing to do).
-    - We factor N = P Q (typically, P is small).
-    - Using (8), we rearrange the data in memory so that we have b not a in memory
-       (this is the block "do the rearrangement").
-       The pseudocode for this is as follows.  For simplicity we use a temporary array.
-
-          for p = 0..P-1
-             for q = 0..Q-1
-                bidx = pQ + q
-                aidx = qP + p
-                tmp[bidx] = data[aidx].
-             end
-          end
-          data <-- tmp
-        else
-
-        endif
-
-
-        The reason this accomplishes (8) is that we want pQ+q and qP+p to be swapped
-        over for each p, q, and the "if m > n" is a convenient way of ensuring that
-        this swapping happens only once (otherwise it would happen twice, since pQ+q
-        and qP+p both range over the entire set of numbers 0..N-1).
-
-    - We do the DFT on the smaller block size to compute c from b (this eq eq. (11)).
-      Note that this is actually multiple DFTs, one for each value of p, but this
-      goes to the "nffts" argument of the function call, which we have ignored up to now.
-
-    -We compute eq. (12) via a loop, as follows
-         allocate temporary array e of size P.
-         For q' = 0..Q-1:
-            for p' = 0..P-1:
-               set sum to zero [this will go in e[p']]
-               for p = p..P-1:
-                  sum += 1^(p'pQ/N) 1^(q'p/N)  c_(pQ+q')
-               end
-               e[p'] = sum
-            end
-            for p' = 0..P-1:
-               d_(p'Q+q') = e[p']
-            end
-         end
-         delete temporary array e
-
-*/
-
-// This is the outer-layer calling code for ComplexFftRecursive.
-// It factorizes the dimension and then calls the FFT routine.
-template<typename Real> void ComplexFft(VectorBase<Real> *v, bool forward, Vector<Real> *tmp_in) {
-  KALDI_ASSERT(v != NULL);
-
-  if (v->Dim()<=1) return;
-  KALDI_ASSERT(v->Dim() % 2 == 0);  // complex input.
-  int N = v->Dim() / 2;
-  std::vector<int> factors;
-  Factorize(N, &factors);
-  int *factor_beg = NULL;
-  if (factors.size() > 0)
-    factor_beg = &(factors[0]);
-  Vector<Real> tmp;  // allocated in ComplexFftRecursive.
-  ComplexFftRecursive(v->Data(), 1, N, factor_beg, factor_beg+factors.size(), forward, (tmp_in?tmp_in:&tmp));
-}
-
-//! Inefficient version of Fourier transform, for testing purposes.
-template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward) {
-  KALDI_ASSERT(v != NULL);
-  MatrixIndexT N = v->Dim();
-  KALDI_ASSERT(N%2 == 0);
-  if (N == 0) return;
-  Vector<Real> vtmp(N*2);  // store as complex.
-  if (forward) {
-    for (MatrixIndexT i = 0; i < N; i++)  vtmp(i*2) = (*v)(i);
-    ComplexFft(&vtmp, forward);  // this is already tested so we can use this.
-    v->CopyFromVec( vtmp.Range(0, N) );
-    (*v)(1) = vtmp(N);  // Copy the N/2'th fourier component, which is real,
-    // to the imaginary part of the 1st complex output.
-  } else {
-    // reverse the transformation above to get the complex spectrum.
-    vtmp(0) = (*v)(0);  // copy F_0 which is real
-    vtmp(N) = (*v)(1);  // copy F_{N/2} which is real
-    for (MatrixIndexT i = 1; i < N/2; i++) {
-      // Copy i'th to i'th fourier component
-      vtmp(2*i) = (*v)(2*i);
-      vtmp(2*i+1) = (*v)(2*i+1);
-      // Copy i'th to N-i'th, conjugated.
-      vtmp(2*(N-i)) = (*v)(2*i);
-      vtmp(2*(N-i)+1) = -(*v)(2*i+1);
-    }
-    ComplexFft(&vtmp, forward);  // actually backward since forward == false
-    // Copy back real part.  Complex part should be zero.
-    for (MatrixIndexT i = 0; i < N; i++)
-      (*v)(i) = vtmp(i*2);
-  }
-}
-
-template void RealFftInefficient (VectorBase<float> *v, bool forward);
-template void RealFftInefficient (VectorBase<double> *v, bool forward);
-
-template
-void ComplexFft(VectorBase<float> *v, bool forward, Vector<float> *tmp_in);
-template
-void ComplexFft(VectorBase<double> *v, bool forward, Vector<double> *tmp_in);
-
-
-// See the long comment below for the math behind this.
-template<typename Real> void RealFft (VectorBase<Real> *v, bool forward) {
-  KALDI_ASSERT(v != NULL);
-  MatrixIndexT N = v->Dim(), N2 = N/2;
-  KALDI_ASSERT(N%2 == 0);
-  if (N == 0) return;
-
-  if (forward) ComplexFft(v, true);
-
-  Real *data = v->Data();
-  Real rootN_re, rootN_im;  // exp(-2pi/N), forward; exp(2pi/N), backward
-  int forward_sign = forward ? -1 : 1;
-  ComplexImExp(static_cast<Real>(M_2PI/N *forward_sign), &rootN_re, &rootN_im);
-  Real kN_re = -forward_sign, kN_im = 0.0;  // exp(-2pik/N), forward; exp(-2pik/N), backward
-  // kN starts out as 1.0 for forward algorithm but -1.0 for backward.
-  for (MatrixIndexT k = 1; 2*k <= N2; k++) {
-    ComplexMul(rootN_re, rootN_im, &kN_re, &kN_im);
-
-    Real Ck_re, Ck_im, Dk_re, Dk_im;
-    // C_k = 1/2 (B_k + B_{N/2 - k}^*) :
-    Ck_re = 0.5 * (data[2*k] + data[N - 2*k]);
-    Ck_im = 0.5 * (data[2*k + 1] - data[N - 2*k + 1]);
-    // re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})):
-    Dk_re = 0.5 * (data[2*k + 1] + data[N - 2*k + 1]);
-    // im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))
-    Dk_im =-0.5 * (data[2*k] - data[N - 2*k]);
-    // A_k = C_k + 1^(k/N) D_k:
-    data[2*k] = Ck_re;  // A_k <-- C_k
-    data[2*k+1] = Ck_im;
-    // now A_k += D_k 1^(k/N)
-    ComplexAddProduct(Dk_re, Dk_im, kN_re, kN_im, &(data[2*k]), &(data[2*k+1]));
-
-    MatrixIndexT kdash = N2 - k;
-    if (kdash != k) {
-      // Next we handle the index k' = N/2 - k.  This is necessary
-      // to do now, to avoid invalidating data that we will later need.
-      // The quantities C_{k'} and D_{k'} are just the conjugates of C_k
-      // and D_k, so the equations are simple modifications of the above,
-      // replacing Ck_im and Dk_im with their negatives.
-      data[2*kdash] = Ck_re;  // A_k' <-- C_k'
-      data[2*kdash+1] = -Ck_im;
-      // now A_k' += D_k' 1^(k'/N)
-      // We use 1^(k'/N) = 1^((N/2 - k) / N) = 1^(1/2) 1^(-k/N) = -1 * (1^(k/N))^*
-      // so it's the same as 1^(k/N) but with the real part negated.
-      ComplexAddProduct(Dk_re, -Dk_im, -kN_re, kN_im, &(data[2*kdash]), &(data[2*kdash+1]));
-    }
-  }
-
-  {  // Now handle k = 0.
-    // In simple terms: after the complex fft, data[0] becomes the sum of real
-    // parts input[0], input[2]... and data[1] becomes the sum of imaginary
-    // pats input[1], input[3]...
-    // "zeroth" [A_0] is just the sum of input[0]+input[1]+input[2]..
-    // and "n2th" [A_{N/2}] is input[0]-input[1]+input[2]... .
-    Real zeroth = data[0] + data[1],
-        n2th = data[0] - data[1];
-    data[0] = zeroth;
-    data[1] = n2th;
-    if (!forward) {
-      data[0] /= 2;
-      data[1] /= 2;
-    }
-  }
-
-  if (!forward) {
-    ComplexFft(v, false);
-    v->Scale(2.0);  // This is so we get a factor of N increase, rather than N/2 which we would
-    // otherwise get from [ComplexFft, forward] + [ComplexFft, backward] in dimension N/2.
-    // It's for consistency with our normal FFT convensions.
-  }
-}
-
-template void RealFft (VectorBase<float> *v, bool forward);
-template void RealFft (VectorBase<double> *v, bool forward);
-
-/* Notes for real FFTs.
-   We are using the same convention as above, 1^x to mean exp(-2\pi x) for the forward transform.
-   Actually, in a slight abuse of notation, we use this meaning for 1^x in both the forward and
-   backward cases because it's more convenient in this section.
-
-   Suppose we have real data a[0...N-1], with N even, and want to compute its Fourier transform.
-   We can make do with the first N/2 points of the transform, since the remaining ones are complex
-   conjugates of the first.  We want to compute:
-       for k = 0...N/2-1,
-       A_k = \sum_{n = 0}^{N-1}  a_n 1^(kn/N)                 (1)
-
-   We treat a[0..N-1] as a complex sequence of length N/2, i.e. a sequence b[0..N/2 - 1].
-   Viewed as sequences of length N/2, we have:
-       b = c + i d,
-   where c = a_0, a_2 ... and d = a_1, a_3 ...
-
-   We can recover the length-N/2 Fourier transforms of c and d by doing FT on b and
-   then doing the equations below.  Derivation is marked by (*) in a comment below (search
-   for it).  Let B, C, D be the FTs.
-   We have
-       C_k = 1/2 (B_k + B_{N/2 - k}^*)                                 (z0)
-       D_k =-1/2i (B_k - B_{N/2 - k}^*)                                (z1)
-so: re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k}))                             (z2)
-    im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))                             (z3)
-
-    To recover the FT A from C and D, we write, rearranging (1):
-
-       A_k = \sum_{n = 0, 2, ..., N-2} a_n 1^(kn/N)
-            +\sum_{n = 1, 3, ..., N-1} a_n 1^(kn/N)
-           = \sum_{n = 0, 1, ..., N/2-1} a_n 1^(2kn/N)  + a_{n+1} 1^(2kn/N) 1^(k/N)
-           = \sum_{n = 0, 1, ..., N/2-1} c_n 1^(2kn/N)  + d_n  1^(2kn/N) 1^(k/N)
-       A_k =  C_k + 1^(k/N) D_k                                              (a0)
-
-    This equation is valid for k = 0...N/2-1, which is the range of the sequences B_k and
-    C_k.  We don't use is for k = 0, which is a special case considered below.  For
-    1 < k < N/2, it's convenient to consider the pair k, k', where k' = N/2 - k.
-    Remember that C_k' = C_k^ *and D_k' = D_k^* [where * is conjugation].  Also,
-    1^(N/2 / N) = -1.  So we have:
-       A_k' = C_k^* - 1^(k/N) D_k^*                                          (a0b)
-    We do (a0) and (a0b) together.
-
-
-
-    By symmetry this gives us the Fourier components for N/2+1, ... N, if we want
-    them.  However, it doesn't give us the value for exactly k = N/2.  For k = 0 and k = N/2, it
-    is easiest to argue directly about the meaning of the A_k, B_k and C_k in terms of
-    sums of points.
-       A_0 and A_{N/2} are both real, with A_0=\sum_n a_n, and A_1 an alternating sum
-       A_1 = a_0 - a_1 + a_2 ...
-     It's easy to show that
-              A_0 = B_0 + C_0            (a1)
-              A_{N/2} = B_0 - C_0.       (a2)
-     Since B_0 and C_0 are both real, B_0 is the real coefficient of D_0 and C_0 is the
-     imaginary coefficient.
-
-     *REVERSING THE PROCESS*
-
-     Next we want to reverse this process.  We just need to work out C_k and D_k from the
-     sequence A_k.  Then we do the inverse complex fft and we get back where we started.
-     For 0 and N/2, working from (a1) and (a2) above, we can see that:
-          B_0 = 1/2 (A_0 + A_{N/2})                                       (y0)
-          C_0 = 1/2 (A_0 + A_{N/2})                                       (y1)
-     and we use
-         D_0 = B_0 + i C_0
-     to get the 1st complex coefficient of D.  This is exactly the same as the forward process
-     except with an extra factor of 1/2.
-
-     Consider equations (a0) and (a0b).  We want to work out C_k and D_k from A_k and A_k'.  Remember
-     k' = N/2 - k.
-
-     Write down
-         A_k     =  C_k + 1^(k/N) D_k        (copying a0)
-         A_k'^* =   C_k - 1^(k/N) D_k       (conjugate of a0b)
-      So
-             C_k =            0.5 (A_k + A_k'^*)                    (p0)
-             D_k = 1^(-k/N) . 0.5 (A_k - A_k'^*)                    (p1)
-      Next, we want to compute B_k and B_k' from C_k and D_k.  C.f. (z0)..(z3), and remember
-      that k' = N/2-k.  We can see
-      that
-              B_k  = C_k + i D_k                                    (p2)
-              B_k' = C_k - i D_k                                    (p3)
-
-     We would like to make the equations (p0) ... (p3) look like the forward equations (z0), (z1),
-     (a0) and (a0b) so we can reuse the code.  Define E_k = -i 1^(k/N) D_k.  Then write down (p0)..(p3).
-     We have
-             C_k  =            0.5 (A_k + A_k'^*)                    (p0')
-             E_k  =       -0.5 i   (A_k - A_k'^*)                    (p1')
-             B_k  =    C_k - 1^(-k/N) E_k                            (p2')
-             B_k' =    C_k + 1^(-k/N) E_k                            (p3')
-     So these are exactly the same as (z0), (z1), (a0), (a0b) except replacing 1^(k/N) with
-     -1^(-k/N) .  Remember that we defined 1^x above to be exp(-2pi x/N), so the signs here
-     might be opposite to what you see in the code.
-
-     MODIFICATION: we need to take care of a factor of two.  The complex FFT we implemented
-     does not divide by N in the reverse case.  So upon inversion we get larger by N/2.
-     However, this is not consistent with normal FFT conventions where you get a factor of N.
-     For this reason we multiply by two after the process described above.
-
-*/
-
-
-/*
-   (*) [this token is referred to in a comment above].
-
-   Notes for separating 2 real transforms from one complex one.  Note that the
-   letters here (A, B, C and N) are all distinct from the same letters used in the
-   place where this comment is used.
-   Suppose we
-   have two sequences a_n and b_n, n = 0..N-1.  We combine them into a complex
-   number,
-      c_n = a_n + i b_n.
-   Then we take the fourier transform to get
-      C_k = \sum_{n = 0}^{N-1} c_n 1^(n/N) .
-   Then we use symmetry.  Define A_k and B_k as the DFTs of a and b.
-   We use A_k = A_{N-k}^*, and B_k = B_{N-k}^*, since a and b are real.  Using
-      C_k     = A_k    +  i B_k,
-      C_{N-k} = A_k^*  +  i B_k^*
-              = A_k^*  -  (i B_k)^*
-   So:
-      A_k     = 1/2  (C_k + C_{N-k}^*)
-    i B_k     = 1/2  (C_k - C_{N-k}^*)
->    B_k     =-1/2i (C_k - C_{N-k}^*)
->  re(B_k)   = 1/2 (im(C_k) + im(C_{N-k}))
-    im(B_k)   =-1/2 (re(C_k) - re(C_{N-k}))
-
- */
-
-template<typename Real> void ComputeDctMatrix(Matrix<Real> *M) {
-  //KALDI_ASSERT(M->NumRows() == M->NumCols());
-  MatrixIndexT K = M->NumRows();
-  MatrixIndexT N = M->NumCols();
-
-  KALDI_ASSERT(K > 0);
-  KALDI_ASSERT(N > 0);
-  Real normalizer = std::sqrt(1.0 / static_cast<Real>(N));  // normalizer for
-  // X_0.
-  for (MatrixIndexT j = 0; j < N; j++) (*M)(0, j) = normalizer;
-  normalizer = std::sqrt(2.0 / static_cast<Real>(N));  // normalizer for other
-   // elements.
-  for (MatrixIndexT k = 1; k < K; k++)
-    for (MatrixIndexT n = 0; n < N; n++)
-      (*M)(k, n) = normalizer
-          * std::cos( static_cast<double>(M_PI)/N * (n + 0.5) * k );
-}
-
-
-template void ComputeDctMatrix(Matrix<float> *M);
-template void ComputeDctMatrix(Matrix<double> *M);
-
-
-template<typename Real>
-void ComputePca(const MatrixBase<Real> &X,
-                MatrixBase<Real> *U,
-                MatrixBase<Real> *A,
-                bool print_eigs,
-                bool exact) {
-  // Note that some of these matrices may be transposed w.r.t. the
-  // way it's most natural to describe them in math... it's the rows
-  // of X and U that correspond to the (data-points, basis elements).
-  MatrixIndexT N = X.NumRows(), D = X.NumCols();
-  // N = #points, D = feature dim.
-  KALDI_ASSERT(U != NULL && U->NumCols() == D);
-  MatrixIndexT G = U->NumRows();  // # of retained basis elements.
-  KALDI_ASSERT(A == NULL || (A->NumRows() == N && A->NumCols() == G));
-  KALDI_ASSERT(G <= N && G <= D);
-  if (D < N) {  // Do conventional PCA.
-    SpMatrix<Real> Msp(D);  // Matrix of outer products.
-    Msp.AddMat2(1.0, X, kTrans, 0.0);  // M <-- X^T X
-    Matrix<Real> Utmp;
-    Vector<Real> l;
-    if (exact) {
-      Utmp.Resize(D, D);
-      l.Resize(D);
-      //Matrix<Real> M(Msp);
-      //M.DestructiveSvd(&l, &Utmp, NULL);
-      Msp.Eig(&l, &Utmp);
-    } else {
-      Utmp.Resize(D, G);
-      l.Resize(G);
-      Msp.TopEigs(&l, &Utmp);
-    }
-    SortSvd(&l, &Utmp);
-
-    for (MatrixIndexT g = 0; g < G; g++)
-      U->Row(g).CopyColFromMat(Utmp, g);
-    if (print_eigs)
-      KALDI_LOG << (exact ? "" : "Retained ")
-                << "PCA eigenvalues are " << l;
-    if (A != NULL)
-      A->AddMatMat(1.0, X, kNoTrans, *U, kTrans, 0.0);
-  } else {  // Do inner-product PCA.
-    SpMatrix<Real> Nsp(N);  // Matrix of inner products.
-    Nsp.AddMat2(1.0, X, kNoTrans, 0.0);  // M <-- X X^T
-
-    Matrix<Real> Vtmp;
-    Vector<Real> l;
-    if (exact) {
-      Vtmp.Resize(N, N);
-      l.Resize(N);
-      Matrix<Real> Nmat(Nsp);
-      Nmat.DestructiveSvd(&l, &Vtmp, NULL);
-    } else {
-      Vtmp.Resize(N, G);
-      l.Resize(G);
-      Nsp.TopEigs(&l, &Vtmp);
-    }
-
-    MatrixIndexT num_zeroed = 0;
-    for (MatrixIndexT g = 0; g < G; g++) {
-      if (l(g) < 0.0) {
-        KALDI_WARN << "In PCA, setting element " << l(g) << " to zero.";
-        l(g) = 0.0;
-        num_zeroed++;
-      }
-    }
-    SortSvd(&l, &Vtmp); // Make sure zero elements are last, this
-    // is necessary for Orthogonalize() to work properly later.
-
-    Vtmp.Transpose();  // So eigenvalues are the rows.
-
-    for (MatrixIndexT g = 0; g < G; g++) {
-      Real sqrtlg = sqrt(l(g));
-      if (l(g) != 0.0) {
-        U->Row(g).AddMatVec(1.0 / sqrtlg, X, kTrans, Vtmp.Row(g), 0.0);
-      } else {
-        U->Row(g).SetZero();
-        (*U)(g, g) = 1.0;  // arbitrary direction.  Will later orthogonalize.
-      }
-      if (A != NULL)
-        for (MatrixIndexT n = 0; n < N; n++)
-          (*A)(n, g) = sqrtlg * Vtmp(g, n);
-    }
-    // Now orthogonalize.  This is mainly useful in
-    // case there were zero eigenvalues, but we do it
-    // for all of them.
-    U->OrthogonalizeRows();
-    if (print_eigs)
-      KALDI_LOG << "(inner-product) PCA eigenvalues are " << l;
-  }
-}
-
-
-template
-void ComputePca(const MatrixBase<float> &X,
-                MatrixBase<float> *U,
-                MatrixBase<float> *A,
-                bool print_eigs,
-                bool exact);
-
-template
-void ComputePca(const MatrixBase<double> &X,
-                MatrixBase<double> *U,
-                MatrixBase<double> *A,
-                bool print_eigs,
-                bool exact);
-
-
-// Added by Dan, Feb. 13 2012.
-// This function does: *plus += max(0, a b^T),
-// *minus += max(0, -(a b^T)).
-template<typename Real>
-void AddOuterProductPlusMinus(Real alpha,
-                              const VectorBase<Real> &a,
-                              const VectorBase<Real> &b,
-                              MatrixBase<Real> *plus,
-                              MatrixBase<Real> *minus) {
-  KALDI_ASSERT(a.Dim() == plus->NumRows() && b.Dim() == plus->NumCols()
-               && a.Dim() == minus->NumRows() && b.Dim() == minus->NumCols());
-  int32 nrows = a.Dim(), ncols = b.Dim(), pskip = plus->Stride() - ncols,
-      mskip = minus->Stride() - ncols;
-  const Real *adata = a.Data(), *bdata = b.Data();
-  Real *plusdata = plus->Data(), *minusdata = minus->Data();
-
-  for (int32 i = 0; i < nrows; i++) {
-    const Real *btmp = bdata;
-    Real multiple = alpha * *adata;
-    if (multiple > 0.0) {
-      for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
-        if (*btmp > 0.0) *plusdata += multiple * *btmp;
-        else *minusdata -= multiple * *btmp;
-      }
-    } else {
-      for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
-        if (*btmp < 0.0) *plusdata += multiple * *btmp;
-        else *minusdata -= multiple * *btmp;
-      }
-    }
-    plusdata += pskip;
-    minusdata += mskip;
-    adata++;
-  }
-}
-
-// Instantiate template
-template
-void AddOuterProductPlusMinus<float>(float alpha,
-                                     const VectorBase<float> &a,
-                                     const VectorBase<float> &b,
-                                     MatrixBase<float> *plus,
-                                     MatrixBase<float> *minus);
-template
-void AddOuterProductPlusMinus<double>(double alpha,
-                                      const VectorBase<double> &a,
-                                      const VectorBase<double> &b,
-                                      MatrixBase<double> *plus,
-                                      MatrixBase<double> *minus);
-
-
-} // end namespace kaldi
--- a/Show More
+++ b/Show More