diff --git a/speechx/README.md b/speechx/README.md index 7d73b61c..07449f9f 100644 --- a/speechx/README.md +++ b/speechx/README.md @@ -5,7 +5,7 @@ We develop under: * docker - registry.baidubce.com/paddlepaddle/paddle:2.1.1-gpu-cuda10.2-cudnn7 * os - Ubuntu 16.04.7 LTS -* gcc/g++ - 8.2.0 +* ** gcc/g++/gfortran - 8.2.0 ** * cmake - 3.16.0 > We make sure all things work fun under docker, and recommend using it to develop and deploy. @@ -29,6 +29,8 @@ nvidia-docker run --privileged --net=host --ipc=host -it --rm -v $PWD:/workspac 2. Build `speechx` and `examples`. +> Do not source venv. + ``` pushd /path/to/speechx ./build.sh diff --git a/speechx/build.sh b/speechx/build.sh index 3e9600d5..8e36d233 100755 --- a/speechx/build.sh +++ b/speechx/build.sh @@ -2,8 +2,7 @@ # the build script had verified in the paddlepaddle docker image. # please follow the instruction below to install PaddlePaddle image. -# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html - +# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html boost_SOURCE_DIR=$PWD/fc_patch/boost-src if [ ! -d ${boost_SOURCE_DIR} ]; then wget -c https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz tar xzfv boost_1_75_0.tar.gz @@ -23,6 +22,6 @@ cd build cmake .. -DBOOST_ROOT:STRING=${boost_SOURCE_DIR} #cmake .. -make -j1 +make -j10 cd - diff --git a/speechx/cmake/FindGFortranLibs.cmake b/speechx/cmake/FindGFortranLibs.cmake new file mode 100644 index 00000000..763f7883 --- /dev/null +++ b/speechx/cmake/FindGFortranLibs.cmake @@ -0,0 +1,145 @@ +#.rst: +# FindGFortranLibs +# -------- +# https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake +# https://enccs.github.io/cmake-workshop/cxx-fortran/ +# +# Find gcc Fortran compiler & library paths +# +# The module defines the following variables: +# +# :: +# +# +# GFORTRANLIBS_FOUND - true if system has gfortran +# LIBGFORTRAN_LIBRARIES - path to libgfortran +# LIBQUADMATH_LIBRARIES - path to libquadmath +# GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath +# GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers +# LIBGOMP_LIBRARIES - path to libgomp +# LIBGOMP_INCLUDE_DIR - directory containing omp.h header +# GFORTRAN_VERSION_STRING - version of gfortran found +# +set(CMAKE_REQUIRED_QUIET ${LIBIOMP_FIND_QUIETLY}) + +if(NOT CMAKE_REQUIRED_QUIET) + message(STATUS "Looking for gfortran related libraries...") +endif() + +enable_language(Fortran) +if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU") + + # Basically, call "gfortran -v" to dump compiler info to the string + # GFORTRAN_VERBOSE_STR, which will be used to get necessary paths + message(STATUS "Extracting library and header information by calling 'gfortran -v'...") + execute_process(COMMAND "${CMAKE_Fortran_COMPILER}" "-v" ERROR_VARIABLE + GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG) + + # For debugging + message(STATUS "'gfortran -v' returned:") + message(STATUS "${GFORTRAN_VERBOSE_STR}") + + # Detect gfortran version + string(REGEX MATCH "gcc version [^\t\n ]+" GFORTRAN_VER_STR "${GFORTRAN_VERBOSE_STR}") + string(REGEX REPLACE "gcc version ([^\t\n ]+)" "\\1" GFORTRAN_VERSION_STRING "${GFORTRAN_VER_STR}") + message(STATUS "Detected gfortran version ${GFORTRAN_VERSION_STRING}") + unset(GFORTRAN_VER_STR) + + set(MATCH_REGEX "[^\t\n ]+[\t\n ]+") + set(REPLACE_REGEX "([^\t\n ]+)") + + # Find architecture for compiler + string(REGEX MATCH "Target: [^\t\n ]+" + GFORTRAN_ARCH_STR "${GFORTRAN_VERBOSE_STR}") + message(STATUS "Architecture string: ${GFORTRAN_ARCH_STR}") + string(REGEX REPLACE "Target: ([^\t\n ]+)" "\\1" + GFORTRAN_ARCH "${GFORTRAN_ARCH_STR}") + message(STATUS "Detected gfortran architecture: ${GFORTRAN_ARCH}") + unset(GFORTRAN_ARCH_STR) + + # Find install prefix, if it exists; if not, use default + string(REGEX MATCH "--prefix=[^\t\n ]+[\t\n ]+" + GFORTRAN_PREFIX_STR "${GFORTRAN_VERBOSE_STR}") + if(NOT GFORTRAN_PREFIX_STR) + message(STATUS "Detected default gfortran prefix") + set(GFORTRAN_PREFIX_DIR "/usr/local") # default prefix for gcc install + else() + string(REGEX REPLACE "--prefix=([^\t\n ]+)" "\\1" + GFORTRAN_PREFIX_DIR "${GFORTRAN_PREFIX_STR}") + endif() + message(STATUS "Detected gfortran prefix: ${GFORTRAN_PREFIX_DIR}") + unset(GFORTRAN_PREFIX_STR) + + # Find install exec-prefix, if it exists; if not, use default + string(REGEX MATCH "--exec-prefix=[^\t\n ]+[\t\n ]+" "\\1" + GFORTRAN_EXEC_PREFIX_STR "${GFORTRAN_VERBOSE_STR}") + if(NOT GFORTRAN_EXEC_PREFIX_STR) + message(STATUS "Detected default gfortran exec-prefix") + set(GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_PREFIX_DIR}") + else() + string(REGEX REPLACE "--exec-prefix=([^\t\n ]+)" "\\1" + GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_EXEC_PREFIX_STR}") + endif() + message(STATUS "Detected gfortran exec-prefix: ${GFORTRAN_EXEC_PREFIX_DIR}") + UNSET(GFORTRAN_EXEC_PREFIX_STR) + + # Find library directory and include directory, if library directory specified + string(REGEX MATCH "--libdir=[^\t\n ]+" + GFORTRAN_LIB_DIR_STR "${GFORTRAN_VERBOSE_STR}") + if(NOT GFORTRAN_LIB_DIR_STR) + message(STATUS "Found --libdir flag -- not found") + message(STATUS "Using default gfortran library & include directory paths") + set(GFORTRAN_LIBRARIES_DIR + "${GFORTRAN_EXEC_PREFIX_DIR}/lib/gcc/${GFORTRAN_ARCH}/${GFORTRAN_VERSION_STRING}") + string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/include") + else() + message(STATUS "Found --libdir flag -- yes") + string(REGEX REPLACE "--libdir=([^\t\n ]+)" "\\1" + GFORTRAN_LIBRARIES_DIR "${GFORTRAN_LIB_DIR_STR}") + string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/gcc/" "${GFORTRAN_ARCH}" "/" "${GFORTRAN_VERSION_STRING}" "/include") + endif() + message(STATUS "gfortran libraries path: ${GFORTRAN_LIBRARIES_DIR}") + message(STATUS "gfortran include path dir: ${GFORTRAN_INCLUDE_DIR}") + unset(GFORTRAN_LIB_DIR_STR) + + # There are lots of other build options for gcc & gfortran. For now, the + # options implemented above should cover a lot of common use cases. + + # Clean up be deleting the output string from "gfortran -v" + unset(GFORTRAN_VERBOSE_STR) + + # Find paths for libgfortran, libquadmath, libgomp + # libgomp needed for OpenMP support without Clang + find_library(LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran + HINTS ${GFORTRAN_LIBRARIES_DIR}) + find_library(LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath + HINTS ${GFORTRAN_LIBRARIES_DIR}) + find_library(LIBGOMP_LIBRARIES NAMES gomp libgomp + HINTS ${GFORTRAN_LIBRARIES_DIR}) + + # Find OpenMP headers + find_path(LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS ${GFORTRAN_INCLUDE_DIR}) + +else() + message(STATUS "CMAKE_Fortran_COMPILER_ID does not match 'GNU'!") +endif() + +include(FindPackageHandleStandardArgs) + +# Required: libgfortran, libquadmath, path for gfortran libraries +# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers +find_package_handle_standard_args(GFortranLibs + REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR + VERSION_VAR GFORTRAN_VERSION_STRING) + +if(GFORTRANLIBS_FOUND) + message(STATUS "Looking for gfortran libraries -- found") + message(STATUS "gfortran version: ${GFORTRAN_VERSION_STRING}") +else() + message(STATUS "Looking for gfortran libraries -- not found") +endif() + +mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES + LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR + GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR) +# FindGFortranLIBS.cmake ends here \ No newline at end of file diff --git a/speechx/cmake/external/openblas.cmake b/speechx/cmake/external/openblas.cmake index 3c202f7f..5c196527 100644 --- a/speechx/cmake/external/openblas.cmake +++ b/speechx/cmake/external/openblas.cmake @@ -7,6 +7,27 @@ set(OpenBLAS_PREFIX ${fc_patch}/OpenBLAS-prefix) # OPENBLAS https://github.com/lattice/quda/blob/develop/CMakeLists.txt#L575 # ###################################################################################################################### enable_language(Fortran) + +include(FortranCInterface) + +# # Clang doesn't have a Fortran compiler in its suite (yet), +# # so detect libraries for gfortran; we need equivalents to +# # libgfortran and libquadmath, which are implicitly +# # linked by flags in CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES +# include(FindGFortranLibs REQUIRED) +# # Add directory containing libgfortran and libquadmath to +# # linker. Should also contain libgomp, if not using +# # Intel OpenMP runtime +# link_directories(${GFORTRAN_LIBRARIES_DIR}) +# # gfortan dir in the docker. +# link_directories(/usr/local/gcc-8.2/lib64) +# # if you are working with C and Fortran +# FortranCInterface_VERIFY() + +# # if you are working with C++ and Fortran +# FortranCInterface_VERIFY(CXX) + + #TODO: switch to CPM include(GNUInstallDirs) ExternalProject_Add( diff --git a/speechx/cmake/external/openfst.cmake b/speechx/cmake/external/openfst.cmake index 07abb18e..dc9cdff6 100644 --- a/speechx/cmake/external/openfst.cmake +++ b/speechx/cmake/external/openfst.cmake @@ -1,13 +1,14 @@ include(FetchContent) +set(openfst_PREFIX_DIR ${fc_patch}/openfst) set(openfst_SOURCE_DIR ${fc_patch}/openfst-src) set(openfst_BINARY_DIR ${fc_patch}/openfst-build) ExternalProject_Add(openfst URL https://github.com/mjansche/openfst/archive/refs/tags/1.7.2.zip URL_HASH SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6 -# #PREFIX ${openfst_PREFIX_DIR} -# SOURCE_DIR ${openfst_SOURCE_DIR} -# BINARY_DIR ${openfst_BINARY_DIR} + PREFIX ${openfst_PREFIX_DIR} + SOURCE_DIR ${openfst_SOURCE_DIR} + BINARY_DIR ${openfst_BINARY_DIR} CONFIGURE_COMMAND ${openfst_SOURCE_DIR}/configure --prefix=${openfst_PREFIX_DIR} "CPPFLAGS=-I${gflags_BINARY_DIR}/include -I${glog_SOURCE_DIR}/src -I${glog_BINARY_DIR}" "LDFLAGS=-L${gflags_BINARY_DIR} -L${glog_BINARY_DIR}" @@ -16,4 +17,4 @@ ExternalProject_Add(openfst BUILD_COMMAND make -j 4 ) link_directories(${openfst_PREFIX_DIR}/lib) -include_directories(${openfst_PREFIX_DIR}/include) +include_directories(${openfst_PREFIX_DIR}/include) \ No newline at end of file diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/feature_cache.cc index d23b3a8b..dad6907c 100644 --- a/speechx/speechx/frontend/feature_cache.cc +++ b/speechx/speechx/frontend/feature_cache.cc @@ -41,6 +41,7 @@ void FeatureCache::Accept(const kaldi::VectorBase& inputs) { // pop feature chunk bool FeatureCache::Read(kaldi::Vector* feats) { kaldi::Timer timer; + std::unique_lock lock(mutex_); while (cache_.empty() && base_extractor_->IsFinished() == false) { ready_read_condition_.wait(lock); @@ -64,10 +65,13 @@ bool FeatureCache::Compute() { // compute and feed Vector feature_chunk; bool result = base_extractor_->Read(&feature_chunk); + std::unique_lock lock(mutex_); while (cache_.size() >= max_size_) { ready_feed_condition_.wait(lock); } + + // feed cache if (feature_chunk.Dim() != 0) { cache_.push(feature_chunk); } diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/feature_cache.h index e52d8b29..b6bbdf3c 100644 --- a/speechx/speechx/frontend/feature_cache.h +++ b/speechx/speechx/frontend/feature_cache.h @@ -24,17 +24,24 @@ class FeatureCache : public FeatureExtractorInterface { explicit FeatureCache( int32 max_size = kint16max, std::unique_ptr base_extractor = NULL); + + // Feed feats or waves virtual void Accept(const kaldi::VectorBase& inputs); + // feats dim = num_frames * feature_dim virtual bool Read(kaldi::Vector* feats); + // feature cache only cache feature which from base extractor virtual size_t Dim() const { return base_extractor_->Dim(); } + virtual void SetFinished() { base_extractor_->SetFinished(); // read the last chunk data Compute(); } + virtual bool IsFinished() const { return base_extractor_->IsFinished(); } + virtual void Reset() { base_extractor_->Reset(); while (!cache_.empty()) { @@ -45,12 +52,14 @@ class FeatureCache : public FeatureExtractorInterface { private: bool Compute(); - std::mutex mutex_; size_t max_size_; - std::queue> cache_; std::unique_ptr base_extractor_; + + std::mutex mutex_; + std::queue> cache_; std::condition_variable ready_feed_condition_; std::condition_variable ready_read_condition_; + // DISALLOW_COPY_AND_ASSGIN(FeatureCache); }; diff --git a/speechx/speechx/frontend/feature_extractor_interface.h b/speechx/speechx/frontend/feature_extractor_interface.h index 3668fbda..5da2526b 100644 --- a/speechx/speechx/frontend/feature_extractor_interface.h +++ b/speechx/speechx/frontend/feature_extractor_interface.h @@ -21,17 +21,26 @@ namespace ppspeech { class FeatureExtractorInterface { public: - // accept input data, accept feature or raw waves which decided - // by the base_extractor + // Feed inputs: features(2D saved in 1D) or waveforms(1D). virtual void Accept(const kaldi::VectorBase& inputs) = 0; - // get the processed result - // the length of output = feature_row * feature_dim, - // the Matrix is squashed into Vector + + // Fetch processed data: features or waveforms. + // For features(2D saved in 1D), the Matrix is squashed into Vector, + // the length of output = feature_row * feature_dim. + // For waveforms(1D), samples saved in vector. virtual bool Read(kaldi::Vector* outputs) = 0; - // the Dim is the feature dim + + // Dim is the feature dim. For waveforms(1D), Dim is zero; else is specific, + // e.g 80 for fbank. virtual size_t Dim() const = 0; + + // End Flag for Streaming Data. virtual void SetFinished() = 0; + + // whether is end of Streaming Data. virtual bool IsFinished() const = 0; + + // Reset to start state. virtual void Reset() = 0; }; diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/linear_spectrogram.h index ffdfbbe9..10853904 100644 --- a/speechx/speechx/frontend/linear_spectrogram.h +++ b/speechx/speechx/frontend/linear_spectrogram.h @@ -23,12 +23,14 @@ namespace ppspeech { struct LinearSpectrogramOptions { kaldi::FrameExtractionOptions frame_opts; - kaldi::BaseFloat streaming_chunk; + kaldi::BaseFloat streaming_chunk; // second + LinearSpectrogramOptions() : streaming_chunk(0.36), frame_opts() {} void Register(kaldi::OptionsItf* opts) { - opts->Register( - "streaming-chunk", &streaming_chunk, "streaming chunk size"); + opts->Register("streaming-chunk", + &streaming_chunk, + "streaming chunk size, default: 0.36 sec"); frame_opts.Register(opts); } };