Merge pull request #2051 from SmileGoat/audio_dev
[audio] add kaldi fbank to paddleaudiopull/2088/head
commit
e17b7542f1
@ -0,0 +1 @@
|
|||||||
|
fc_patch/
|
@ -0,0 +1,39 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
|
||||||
|
|
||||||
|
project(paddleaudio VERSION 0.1)
|
||||||
|
|
||||||
|
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
|
||||||
|
|
||||||
|
# cmake dir
|
||||||
|
set(paddleaudio_cmake_dir ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
|
|
||||||
|
# Modules
|
||||||
|
list(APPEND CMAKE_MODULE_PATH ${paddleaudio_cmake_dir}/external)
|
||||||
|
list(APPEND CMAKE_MODULE_PATH ${paddleaudio_cmake_dir})
|
||||||
|
include(FetchContent)
|
||||||
|
include(ExternalProject)
|
||||||
|
|
||||||
|
# fc_patch dir
|
||||||
|
set(FETCHCONTENT_QUIET off)
|
||||||
|
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
|
||||||
|
set(FETCHCONTENT_BASE_DIR ${fc_patch})
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fPIC -O0 -Wall -g")
|
||||||
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
option(BUILD_SOX "Build libsox statically" ON)
|
||||||
|
|
||||||
|
|
||||||
|
# checkout the thirdparty/kaldi/base/kaldi-types.h
|
||||||
|
# compile kaldi without openfst
|
||||||
|
add_definitions("-DCOMPILE_WITHOUT_OPENFST")
|
||||||
|
|
||||||
|
include(openblas)
|
||||||
|
include(pybind)
|
||||||
|
|
||||||
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/third_party/kaldi)
|
||||||
|
include_directories(/usr/include/python3.7m)
|
||||||
|
add_subdirectory(third_party)
|
||||||
|
add_subdirectory(csrc)
|
@ -0,0 +1,145 @@
|
|||||||
|
#.rst:
|
||||||
|
# FindGFortranLibs
|
||||||
|
# --------
|
||||||
|
# https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake
|
||||||
|
# https://enccs.github.io/cmake-workshop/cxx-fortran/
|
||||||
|
#
|
||||||
|
# Find gcc Fortran compiler & library paths
|
||||||
|
#
|
||||||
|
# The module defines the following variables:
|
||||||
|
#
|
||||||
|
# ::
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# GFORTRANLIBS_FOUND - true if system has gfortran
|
||||||
|
# LIBGFORTRAN_LIBRARIES - path to libgfortran
|
||||||
|
# LIBQUADMATH_LIBRARIES - path to libquadmath
|
||||||
|
# GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath
|
||||||
|
# GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers
|
||||||
|
# LIBGOMP_LIBRARIES - path to libgomp
|
||||||
|
# LIBGOMP_INCLUDE_DIR - directory containing omp.h header
|
||||||
|
# GFORTRAN_VERSION_STRING - version of gfortran found
|
||||||
|
#
|
||||||
|
set(CMAKE_REQUIRED_QUIET ${LIBIOMP_FIND_QUIETLY})
|
||||||
|
|
||||||
|
if(NOT CMAKE_REQUIRED_QUIET)
|
||||||
|
message(STATUS "Looking for gfortran related libraries...")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
enable_language(Fortran)
|
||||||
|
if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
|
||||||
|
|
||||||
|
# Basically, call "gfortran -v" to dump compiler info to the string
|
||||||
|
# GFORTRAN_VERBOSE_STR, which will be used to get necessary paths
|
||||||
|
message(STATUS "Extracting library and header information by calling 'gfortran -v'...")
|
||||||
|
execute_process(COMMAND "${CMAKE_Fortran_COMPILER}" "-v" ERROR_VARIABLE
|
||||||
|
GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG)
|
||||||
|
|
||||||
|
# For debugging
|
||||||
|
message(STATUS "'gfortran -v' returned:")
|
||||||
|
message(STATUS "${GFORTRAN_VERBOSE_STR}")
|
||||||
|
|
||||||
|
# Detect gfortran version
|
||||||
|
string(REGEX MATCH "gcc version [^\t\n ]+" GFORTRAN_VER_STR "${GFORTRAN_VERBOSE_STR}")
|
||||||
|
string(REGEX REPLACE "gcc version ([^\t\n ]+)" "\\1" GFORTRAN_VERSION_STRING "${GFORTRAN_VER_STR}")
|
||||||
|
message(STATUS "Detected gfortran version ${GFORTRAN_VERSION_STRING}")
|
||||||
|
unset(GFORTRAN_VER_STR)
|
||||||
|
|
||||||
|
set(MATCH_REGEX "[^\t\n ]+[\t\n ]+")
|
||||||
|
set(REPLACE_REGEX "([^\t\n ]+)")
|
||||||
|
|
||||||
|
# Find architecture for compiler
|
||||||
|
string(REGEX MATCH "Target: [^\t\n ]+"
|
||||||
|
GFORTRAN_ARCH_STR "${GFORTRAN_VERBOSE_STR}")
|
||||||
|
message(STATUS "Architecture string: ${GFORTRAN_ARCH_STR}")
|
||||||
|
string(REGEX REPLACE "Target: ([^\t\n ]+)" "\\1"
|
||||||
|
GFORTRAN_ARCH "${GFORTRAN_ARCH_STR}")
|
||||||
|
message(STATUS "Detected gfortran architecture: ${GFORTRAN_ARCH}")
|
||||||
|
unset(GFORTRAN_ARCH_STR)
|
||||||
|
|
||||||
|
# Find install prefix, if it exists; if not, use default
|
||||||
|
string(REGEX MATCH "--prefix=[^\t\n ]+[\t\n ]+"
|
||||||
|
GFORTRAN_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
|
||||||
|
if(NOT GFORTRAN_PREFIX_STR)
|
||||||
|
message(STATUS "Detected default gfortran prefix")
|
||||||
|
set(GFORTRAN_PREFIX_DIR "/usr/local") # default prefix for gcc install
|
||||||
|
else()
|
||||||
|
string(REGEX REPLACE "--prefix=([^\t\n ]+)" "\\1"
|
||||||
|
GFORTRAN_PREFIX_DIR "${GFORTRAN_PREFIX_STR}")
|
||||||
|
endif()
|
||||||
|
message(STATUS "Detected gfortran prefix: ${GFORTRAN_PREFIX_DIR}")
|
||||||
|
unset(GFORTRAN_PREFIX_STR)
|
||||||
|
|
||||||
|
# Find install exec-prefix, if it exists; if not, use default
|
||||||
|
string(REGEX MATCH "--exec-prefix=[^\t\n ]+[\t\n ]+" "\\1"
|
||||||
|
GFORTRAN_EXEC_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
|
||||||
|
if(NOT GFORTRAN_EXEC_PREFIX_STR)
|
||||||
|
message(STATUS "Detected default gfortran exec-prefix")
|
||||||
|
set(GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_PREFIX_DIR}")
|
||||||
|
else()
|
||||||
|
string(REGEX REPLACE "--exec-prefix=([^\t\n ]+)" "\\1"
|
||||||
|
GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_EXEC_PREFIX_STR}")
|
||||||
|
endif()
|
||||||
|
message(STATUS "Detected gfortran exec-prefix: ${GFORTRAN_EXEC_PREFIX_DIR}")
|
||||||
|
UNSET(GFORTRAN_EXEC_PREFIX_STR)
|
||||||
|
|
||||||
|
# Find library directory and include directory, if library directory specified
|
||||||
|
string(REGEX MATCH "--libdir=[^\t\n ]+"
|
||||||
|
GFORTRAN_LIB_DIR_STR "${GFORTRAN_VERBOSE_STR}")
|
||||||
|
if(NOT GFORTRAN_LIB_DIR_STR)
|
||||||
|
message(STATUS "Found --libdir flag -- not found")
|
||||||
|
message(STATUS "Using default gfortran library & include directory paths")
|
||||||
|
set(GFORTRAN_LIBRARIES_DIR
|
||||||
|
"${GFORTRAN_EXEC_PREFIX_DIR}/lib/gcc/${GFORTRAN_ARCH}/${GFORTRAN_VERSION_STRING}")
|
||||||
|
string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/include")
|
||||||
|
else()
|
||||||
|
message(STATUS "Found --libdir flag -- yes")
|
||||||
|
string(REGEX REPLACE "--libdir=([^\t\n ]+)" "\\1"
|
||||||
|
GFORTRAN_LIBRARIES_DIR "${GFORTRAN_LIB_DIR_STR}")
|
||||||
|
string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/gcc/" "${GFORTRAN_ARCH}" "/" "${GFORTRAN_VERSION_STRING}" "/include")
|
||||||
|
endif()
|
||||||
|
message(STATUS "gfortran libraries path: ${GFORTRAN_LIBRARIES_DIR}")
|
||||||
|
message(STATUS "gfortran include path dir: ${GFORTRAN_INCLUDE_DIR}")
|
||||||
|
unset(GFORTRAN_LIB_DIR_STR)
|
||||||
|
|
||||||
|
# There are lots of other build options for gcc & gfortran. For now, the
|
||||||
|
# options implemented above should cover a lot of common use cases.
|
||||||
|
|
||||||
|
# Clean up be deleting the output string from "gfortran -v"
|
||||||
|
unset(GFORTRAN_VERBOSE_STR)
|
||||||
|
|
||||||
|
# Find paths for libgfortran, libquadmath, libgomp
|
||||||
|
# libgomp needed for OpenMP support without Clang
|
||||||
|
find_library(LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran
|
||||||
|
HINTS ${GFORTRAN_LIBRARIES_DIR})
|
||||||
|
find_library(LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath
|
||||||
|
HINTS ${GFORTRAN_LIBRARIES_DIR})
|
||||||
|
find_library(LIBGOMP_LIBRARIES NAMES gomp libgomp
|
||||||
|
HINTS ${GFORTRAN_LIBRARIES_DIR})
|
||||||
|
|
||||||
|
# Find OpenMP headers
|
||||||
|
find_path(LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS ${GFORTRAN_INCLUDE_DIR})
|
||||||
|
|
||||||
|
else()
|
||||||
|
message(STATUS "CMAKE_Fortran_COMPILER_ID does not match 'GNU'!")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
|
||||||
|
# Required: libgfortran, libquadmath, path for gfortran libraries
|
||||||
|
# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers
|
||||||
|
find_package_handle_standard_args(GFortranLibs
|
||||||
|
REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR
|
||||||
|
VERSION_VAR GFORTRAN_VERSION_STRING)
|
||||||
|
|
||||||
|
if(GFORTRANLIBS_FOUND)
|
||||||
|
message(STATUS "Looking for gfortran libraries -- found")
|
||||||
|
message(STATUS "gfortran version: ${GFORTRAN_VERSION_STRING}")
|
||||||
|
else()
|
||||||
|
message(STATUS "Looking for gfortran libraries -- not found")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
|
||||||
|
LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
|
||||||
|
GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR)
|
||||||
|
# FindGFortranLIBS.cmake ends here
|
@ -0,0 +1,58 @@
|
|||||||
|
include(FetchContent)
|
||||||
|
|
||||||
|
set(OpenBLAS_SOURCE_DIR ${fc_patch}/OpenBLAS-src)
|
||||||
|
set(OpenBLAS_PREFIX ${fc_patch}/OpenBLAS-prefix)
|
||||||
|
|
||||||
|
# ######################################################################################################################
|
||||||
|
# OPENBLAS https://github.com/lattice/quda/blob/develop/CMakeLists.txt#L575
|
||||||
|
# ######################################################################################################################
|
||||||
|
enable_language(Fortran)
|
||||||
|
|
||||||
|
include(FortranCInterface)
|
||||||
|
|
||||||
|
# # Clang doesn't have a Fortran compiler in its suite (yet),
|
||||||
|
# # so detect libraries for gfortran; we need equivalents to
|
||||||
|
# # libgfortran and libquadmath, which are implicitly
|
||||||
|
# # linked by flags in CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES
|
||||||
|
# include(FindGFortranLibs REQUIRED)
|
||||||
|
# # Add directory containing libgfortran and libquadmath to
|
||||||
|
# # linker. Should also contain libgomp, if not using
|
||||||
|
# # Intel OpenMP runtime
|
||||||
|
# link_directories(${GFORTRAN_LIBRARIES_DIR})
|
||||||
|
# # gfortan dir in the docker.
|
||||||
|
# link_directories(/usr/local/gcc-8.2/lib64)
|
||||||
|
# # if you are working with C and Fortran
|
||||||
|
# FortranCInterface_VERIFY()
|
||||||
|
|
||||||
|
# # if you are working with C++ and Fortran
|
||||||
|
# FortranCInterface_VERIFY(CXX)
|
||||||
|
|
||||||
|
|
||||||
|
#TODO: switch to CPM
|
||||||
|
include(GNUInstallDirs)
|
||||||
|
ExternalProject_Add(
|
||||||
|
OPENBLAS
|
||||||
|
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
|
||||||
|
GIT_TAG v0.3.10
|
||||||
|
GIT_SHALLOW YES
|
||||||
|
PREFIX ${OpenBLAS_PREFIX}
|
||||||
|
SOURCE_DIR ${OpenBLAS_SOURCE_DIR}
|
||||||
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
|
||||||
|
CMAKE_GENERATOR "Unix Makefiles")
|
||||||
|
|
||||||
|
|
||||||
|
# https://cmake.org/cmake/help/latest/module/ExternalProject.html?highlight=externalproject_get_property#external-project-definition
|
||||||
|
ExternalProject_Get_Property(OPENBLAS INSTALL_DIR)
|
||||||
|
set(OpenBLAS_INSTALL_PREFIX ${INSTALL_DIR})
|
||||||
|
add_library(openblas STATIC IMPORTED)
|
||||||
|
add_dependencies(openblas OPENBLAS)
|
||||||
|
set_target_properties(openblas PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES Fortran)
|
||||||
|
# ${CMAKE_INSTALL_LIBDIR} lib
|
||||||
|
set_target_properties(openblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a)
|
||||||
|
|
||||||
|
|
||||||
|
# https://cmake.org/cmake/help/latest/command/install.html?highlight=cmake_install_libdir#installing-targets
|
||||||
|
# ${CMAKE_INSTALL_LIBDIR} lib
|
||||||
|
# ${CMAKE_INSTALL_INCLUDEDIR} include
|
||||||
|
link_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
|
||||||
|
include_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/openblas)
|
@ -0,0 +1,9 @@
|
|||||||
|
include(FetchContent)
|
||||||
|
FetchContent_Declare(
|
||||||
|
pybind
|
||||||
|
URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.0.zip
|
||||||
|
URL_HASH SHA256=1c6e0141f7092867c5bf388bc3acdb2689ed49f59c3977651394c6c87ae88232
|
||||||
|
)
|
||||||
|
FetchContent_MakeAvailable(pybind)
|
||||||
|
include_directories(${pybind_SOURCE_DIR}/include)
|
||||||
|
|
@ -0,0 +1,15 @@
|
|||||||
|
|
||||||
|
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||||
|
|
||||||
|
include_directories(
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(kaldi_feature
|
||||||
|
kaldi_feature.cc
|
||||||
|
kaldi_feature_wrapper.cc
|
||||||
|
)
|
||||||
|
target_link_libraries(kaldi_feature kaldi-fbank)
|
||||||
|
|
||||||
|
pybind11_add_module(kaldi_frontend kaldi_feature.cc kaldi_feature_wrapper.cc)
|
||||||
|
target_link_libraries(kaldi_frontend PRIVATE kaldi_feature)
|
@ -0,0 +1,52 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "feat/feature-window.h"
|
||||||
|
#include <pybind11/pybind11.h>
|
||||||
|
#include <pybind11/numpy.h>
|
||||||
|
|
||||||
|
namespace paddleaudio {
|
||||||
|
|
||||||
|
namespace py = pybind11;
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
class StreamingFeatureTpl {
|
||||||
|
public:
|
||||||
|
typedef typename F::Options Options;
|
||||||
|
StreamingFeatureTpl(const Options& opts);
|
||||||
|
bool ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav,
|
||||||
|
kaldi::Vector<kaldi::BaseFloat>* feats);
|
||||||
|
void Reset() {
|
||||||
|
remained_wav_.Resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int Dim() {
|
||||||
|
return computer_.Dim();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
|
||||||
|
kaldi::Vector<kaldi::BaseFloat>* feats);
|
||||||
|
Options opts_;
|
||||||
|
kaldi::FeatureWindowFunction window_function_;
|
||||||
|
kaldi::Vector<kaldi::BaseFloat> remained_wav_;
|
||||||
|
F computer_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ppspeech
|
||||||
|
|
||||||
|
#include "feature_common_inl.h"
|
||||||
|
|
@ -0,0 +1,92 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "base/kaldi-common.h"
|
||||||
|
|
||||||
|
namespace paddleaudio {
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
StreamingFeatureTpl<F>::StreamingFeatureTpl(
|
||||||
|
const Options& opts)
|
||||||
|
: opts_(opts), computer_(opts),
|
||||||
|
window_function_(opts.frame_opts) {
|
||||||
|
//window_function_(computer_.GetFrameOptions()) { the opt set to zero
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
bool StreamingFeatureTpl<F>::ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav,
|
||||||
|
kaldi::Vector<kaldi::BaseFloat>* feats) {
|
||||||
|
// append remaned waves
|
||||||
|
kaldi::int32 wav_len = wav.Dim();
|
||||||
|
if (wav_len == 0) return false;
|
||||||
|
kaldi::int32 left_len = remained_wav_.Dim();
|
||||||
|
kaldi::Vector<kaldi::BaseFloat> waves(left_len + wav_len);
|
||||||
|
waves.Range(0, left_len).CopyFromVec(remained_wav_);
|
||||||
|
waves.Range(left_len, wav_len).CopyFromVec(wav);
|
||||||
|
|
||||||
|
// cache remaned waves
|
||||||
|
kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
|
||||||
|
kaldi::int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts);
|
||||||
|
kaldi::int32 frame_shift = frame_opts.WindowShift();
|
||||||
|
kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames;
|
||||||
|
remained_wav_.Resize(left_samples);
|
||||||
|
remained_wav_.CopyFromVec(
|
||||||
|
waves.Range(frame_shift * num_frames, left_samples));
|
||||||
|
|
||||||
|
// compute speech feature
|
||||||
|
Compute(waves, feats);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute feat
|
||||||
|
template <class F>
|
||||||
|
bool StreamingFeatureTpl<F>::Compute(
|
||||||
|
const kaldi::Vector<kaldi::BaseFloat>& waves,
|
||||||
|
kaldi::Vector<kaldi::BaseFloat>* feats) {
|
||||||
|
kaldi::BaseFloat vtln_warp = 1.0;
|
||||||
|
const kaldi::FrameExtractionOptions& frame_opts =
|
||||||
|
computer_.GetFrameOptions();
|
||||||
|
kaldi::int32 num_samples = waves.Dim();
|
||||||
|
kaldi::int32 frame_length = frame_opts.WindowSize();
|
||||||
|
kaldi::int32 sample_rate = frame_opts.samp_freq;
|
||||||
|
if (num_samples < frame_length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
kaldi::int32 num_frames = kaldi::NumFrames(num_samples, frame_opts);
|
||||||
|
feats->Resize(num_frames * Dim());
|
||||||
|
|
||||||
|
kaldi::Vector<kaldi::BaseFloat> window;
|
||||||
|
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
|
||||||
|
for (kaldi::int32 frame = 0; frame < num_frames; frame++) {
|
||||||
|
kaldi::BaseFloat raw_log_energy = 0.0;
|
||||||
|
kaldi::ExtractWindow(0,
|
||||||
|
waves,
|
||||||
|
frame,
|
||||||
|
frame_opts,
|
||||||
|
window_function_,
|
||||||
|
&window,
|
||||||
|
need_raw_log_energy ? &raw_log_energy : NULL);
|
||||||
|
|
||||||
|
kaldi::Vector<kaldi::BaseFloat> this_feature(computer_.Dim(),
|
||||||
|
kaldi::kUndefined);
|
||||||
|
computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature);
|
||||||
|
kaldi::SubVector<kaldi::BaseFloat> output_row(
|
||||||
|
feats->Data() + frame * Dim(), Dim());
|
||||||
|
output_row.CopyFromVec(this_feature);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace paddleaudio
|
@ -0,0 +1,143 @@
|
|||||||
|
|
||||||
|
#include <pybind11/pybind11.h>
|
||||||
|
#include <pybind11/numpy.h>
|
||||||
|
|
||||||
|
#include "kaldi_feature_wrapper.h"
|
||||||
|
|
||||||
|
namespace py=pybind11;
|
||||||
|
|
||||||
|
bool InitFbank(
|
||||||
|
float samp_freq, // frame opts
|
||||||
|
float frame_shift_ms,
|
||||||
|
float frame_length_ms,
|
||||||
|
float dither,
|
||||||
|
float preemph_coeff,
|
||||||
|
bool remove_dc_offset,
|
||||||
|
std::string window_type, // e.g. Hamming window
|
||||||
|
bool round_to_power_of_two,
|
||||||
|
float blackman_coeff,
|
||||||
|
bool snip_edges,
|
||||||
|
bool allow_downsample,
|
||||||
|
bool allow_upsample,
|
||||||
|
int max_feature_vectors,
|
||||||
|
int num_bins, // mel opts
|
||||||
|
float low_freq,
|
||||||
|
float high_freq,
|
||||||
|
float vtln_low,
|
||||||
|
float vtln_high,
|
||||||
|
bool debug_mel,
|
||||||
|
bool htk_mode,
|
||||||
|
bool use_energy, // fbank opts
|
||||||
|
float energy_floor,
|
||||||
|
bool raw_energy,
|
||||||
|
bool htk_compat,
|
||||||
|
bool use_log_fbank,
|
||||||
|
bool use_power) {
|
||||||
|
kaldi::FbankOptions opts;
|
||||||
|
opts.frame_opts.samp_freq = samp_freq; // frame opts
|
||||||
|
opts.frame_opts.frame_shift_ms = frame_shift_ms;
|
||||||
|
opts.frame_opts.frame_length_ms = frame_length_ms;
|
||||||
|
opts.frame_opts.dither = dither;
|
||||||
|
opts.frame_opts.preemph_coeff = preemph_coeff;
|
||||||
|
opts.frame_opts.remove_dc_offset = remove_dc_offset;
|
||||||
|
opts.frame_opts.window_type = window_type;
|
||||||
|
opts.frame_opts.round_to_power_of_two = round_to_power_of_two;
|
||||||
|
opts.frame_opts.blackman_coeff = blackman_coeff;
|
||||||
|
opts.frame_opts.snip_edges = snip_edges;
|
||||||
|
opts.frame_opts.allow_downsample = allow_downsample;
|
||||||
|
opts.frame_opts.allow_upsample = allow_upsample;
|
||||||
|
opts.frame_opts.max_feature_vectors = max_feature_vectors;
|
||||||
|
|
||||||
|
opts.mel_opts.num_bins = num_bins; // mel opts
|
||||||
|
opts.mel_opts.low_freq = low_freq;
|
||||||
|
opts.mel_opts.high_freq = high_freq;
|
||||||
|
opts.mel_opts.vtln_low = vtln_low;
|
||||||
|
opts.mel_opts.vtln_high = vtln_high;
|
||||||
|
opts.mel_opts.debug_mel = debug_mel;
|
||||||
|
opts.mel_opts.htk_mode = htk_mode;
|
||||||
|
|
||||||
|
opts.use_energy = use_energy; // fbank opts
|
||||||
|
opts.energy_floor = energy_floor;
|
||||||
|
opts.raw_energy = raw_energy;
|
||||||
|
opts.htk_compat = htk_compat;
|
||||||
|
opts.use_log_fbank = use_log_fbank;
|
||||||
|
opts.use_power = use_power;
|
||||||
|
paddleaudio::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav) {
|
||||||
|
return paddleaudio::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav);
|
||||||
|
}
|
||||||
|
|
||||||
|
py::array_t<double> ComputeFbank(
|
||||||
|
float samp_freq, // frame opts
|
||||||
|
float frame_shift_ms,
|
||||||
|
float frame_length_ms,
|
||||||
|
float dither,
|
||||||
|
float preemph_coeff,
|
||||||
|
bool remove_dc_offset,
|
||||||
|
std::string window_type, // e.g. Hamming window
|
||||||
|
bool round_to_power_of_two,
|
||||||
|
float blackman_coeff,
|
||||||
|
bool snip_edges,
|
||||||
|
bool allow_downsample,
|
||||||
|
bool allow_upsample,
|
||||||
|
int max_feature_vectors,
|
||||||
|
int num_bins, // mel opts
|
||||||
|
float low_freq,
|
||||||
|
float high_freq,
|
||||||
|
float vtln_low,
|
||||||
|
float vtln_high,
|
||||||
|
bool debug_mel,
|
||||||
|
bool htk_mode,
|
||||||
|
bool use_energy, // fbank opts
|
||||||
|
float energy_floor,
|
||||||
|
bool raw_energy,
|
||||||
|
bool htk_compat,
|
||||||
|
bool use_log_fbank,
|
||||||
|
bool use_power,
|
||||||
|
const py::array_t<double>& wav) {
|
||||||
|
InitFbank(samp_freq, // frame opts
|
||||||
|
frame_shift_ms,
|
||||||
|
frame_length_ms,
|
||||||
|
dither,
|
||||||
|
preemph_coeff,
|
||||||
|
remove_dc_offset,
|
||||||
|
window_type, // e.g. Hamming window
|
||||||
|
round_to_power_of_two,
|
||||||
|
blackman_coeff,
|
||||||
|
snip_edges,
|
||||||
|
allow_downsample,
|
||||||
|
allow_upsample,
|
||||||
|
max_feature_vectors,
|
||||||
|
num_bins, // mel opts
|
||||||
|
low_freq,
|
||||||
|
high_freq,
|
||||||
|
vtln_low,
|
||||||
|
vtln_high,
|
||||||
|
debug_mel,
|
||||||
|
htk_mode,
|
||||||
|
use_energy, // fbank opts
|
||||||
|
energy_floor,
|
||||||
|
raw_energy,
|
||||||
|
htk_compat,
|
||||||
|
use_log_fbank,
|
||||||
|
use_power);
|
||||||
|
py::array_t<double> result = ComputeFbankStreaming(wav);
|
||||||
|
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ResetFbank() {
|
||||||
|
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank();
|
||||||
|
}
|
||||||
|
|
||||||
|
PYBIND11_MODULE(kaldi_featurepy, m) {
|
||||||
|
m.doc() = "kaldi_feature example";
|
||||||
|
m.def("InitFbank", &InitFbank, "init fbank");
|
||||||
|
m.def("ResetFbank", &ResetFbank, "reset fbank");
|
||||||
|
m.def("ComputeFbank", &ComputeFbank, "compute fbank");
|
||||||
|
m.def("ComputeFbankStreaming", &ComputeFbankStreaming, "compute fbank streaming");
|
||||||
|
}
|
@ -0,0 +1,71 @@
|
|||||||
|
#include <pybind11/pybind11.h>
|
||||||
|
#include <pybind11/numpy.h>
|
||||||
|
|
||||||
|
#include "kaldi_feature_wrapper.h"
|
||||||
|
|
||||||
|
namespace py=pybind11;
|
||||||
|
|
||||||
|
bool InitFbank(
|
||||||
|
float samp_freq, // frame opts
|
||||||
|
float frame_shift_ms,
|
||||||
|
float frame_length_ms,
|
||||||
|
float dither,
|
||||||
|
float preemph_coeff,
|
||||||
|
bool remove_dc_offset,
|
||||||
|
std::string window_type, // e.g. Hamming window
|
||||||
|
bool round_to_power_of_two,
|
||||||
|
float blackman_coeff,
|
||||||
|
bool snip_edges,
|
||||||
|
bool allow_downsample,
|
||||||
|
bool allow_upsample,
|
||||||
|
int max_feature_vectors,
|
||||||
|
int num_bins, // mel opts
|
||||||
|
float low_freq,
|
||||||
|
float high_freq,
|
||||||
|
float vtln_low,
|
||||||
|
float vtln_high,
|
||||||
|
bool debug_mel,
|
||||||
|
bool htk_mode,
|
||||||
|
bool use_energy, // fbank opts
|
||||||
|
float energy_floor,
|
||||||
|
bool raw_energy,
|
||||||
|
bool htk_compat,
|
||||||
|
bool use_log_fbank,
|
||||||
|
bool use_power);
|
||||||
|
|
||||||
|
py::array_t<double> ComputeFbank(
|
||||||
|
float samp_freq, // frame opts
|
||||||
|
float frame_shift_ms,
|
||||||
|
float frame_length_ms,
|
||||||
|
float dither,
|
||||||
|
float preemph_coeff,
|
||||||
|
bool remove_dc_offset,
|
||||||
|
std::string window_type, // e.g. Hamming window
|
||||||
|
bool round_to_power_of_two,
|
||||||
|
kaldi::BaseFloat blackman_coeff,
|
||||||
|
bool snip_edges,
|
||||||
|
bool allow_downsample,
|
||||||
|
bool allow_upsample,
|
||||||
|
int max_feature_vectors,
|
||||||
|
int num_bins, // mel opts
|
||||||
|
float low_freq,
|
||||||
|
float high_freq,
|
||||||
|
float vtln_low,
|
||||||
|
float vtln_high,
|
||||||
|
bool debug_mel,
|
||||||
|
bool htk_mode,
|
||||||
|
bool use_energy, // fbank opts
|
||||||
|
float energy_floor,
|
||||||
|
bool raw_energy,
|
||||||
|
bool htk_compat,
|
||||||
|
bool use_log_fbank,
|
||||||
|
bool use_power,
|
||||||
|
const py::array_t<double>& wav);
|
||||||
|
|
||||||
|
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav);
|
||||||
|
|
||||||
|
void ResetFbank();
|
||||||
|
|
||||||
|
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav);
|
||||||
|
|
||||||
|
py::array_t<double> TestFun(const py::array_t<double>& wav);
|
@ -0,0 +1,57 @@
|
|||||||
|
#include "kaldi_feature_wrapper.h"
|
||||||
|
|
||||||
|
namespace paddleaudio {
|
||||||
|
|
||||||
|
KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
|
||||||
|
static KaldiFeatureWrapper instance;
|
||||||
|
return &instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
|
||||||
|
fbank_.reset(new Fbank(opts));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double> wav) {
|
||||||
|
py::buffer_info info = wav.request();
|
||||||
|
kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
|
||||||
|
double* wav_ptr = (double*)info.ptr;
|
||||||
|
for (int idx = 0; idx < info.size; ++idx) {
|
||||||
|
input_wav(idx) = *wav_ptr;
|
||||||
|
wav_ptr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
kaldi::Vector<kaldi::BaseFloat> feats;
|
||||||
|
bool flag = fbank_->ComputeFeature(input_wav, &feats);
|
||||||
|
if (flag == false || feats.Dim() == 0) return py::array_t<double>();
|
||||||
|
auto result = py::array_t<double>(feats.Dim());
|
||||||
|
py::buffer_info xs = result.request();
|
||||||
|
for (int idx = 0; idx < 10; ++idx) {
|
||||||
|
float val = feats(idx);
|
||||||
|
std::cout << val << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
double* res_ptr = (double*)xs.ptr;
|
||||||
|
for (int idx = 0; idx < feats.Dim(); ++idx) {
|
||||||
|
*res_ptr = feats(idx);
|
||||||
|
res_ptr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.reshape({ feats.Dim() / Dim(), Dim()});
|
||||||
|
/*
|
||||||
|
py::buffer_info info = wav.request();
|
||||||
|
std::cout << info.size << std::endl;
|
||||||
|
auto result = py::array_t<double>(info.size);
|
||||||
|
//kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
|
||||||
|
kaldi::Vector<double> input_wav(info.size);
|
||||||
|
py::buffer_info info_re = result.request();
|
||||||
|
|
||||||
|
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
|
||||||
|
memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()* sizeof(double));
|
||||||
|
return result;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace paddleaudio
|
@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
#include "base/kaldi-common.h"
|
||||||
|
#include "feature_common.h"
|
||||||
|
#include "feat/feature-fbank.h"
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace paddleaudio {
|
||||||
|
|
||||||
|
typedef StreamingFeatureTpl<kaldi::FbankComputer> Fbank;
|
||||||
|
|
||||||
|
class KaldiFeatureWrapper {
|
||||||
|
public:
|
||||||
|
static KaldiFeatureWrapper* GetInstance();
|
||||||
|
bool InitFbank(kaldi::FbankOptions opts);
|
||||||
|
py::array_t<double> ComputeFbank(const py::array_t<double> wav);
|
||||||
|
int Dim() {
|
||||||
|
return fbank_->Dim();
|
||||||
|
}
|
||||||
|
void ResetFbank() {
|
||||||
|
fbank_->Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<paddleaudio::Fbank> fbank_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace paddleaudio
|
@ -0,0 +1,60 @@
|
|||||||
|
project(kaldi)
|
||||||
|
|
||||||
|
|
||||||
|
add_library(kaldi-base
|
||||||
|
base/io-funcs.cc
|
||||||
|
base/kaldi-error.cc
|
||||||
|
base/kaldi-math.cc
|
||||||
|
base/kaldi-utils.cc
|
||||||
|
base/timer.cc
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(kaldi-util
|
||||||
|
util/kaldi-holder.cc
|
||||||
|
util/kaldi-io.cc
|
||||||
|
util/kaldi-semaphore.cc
|
||||||
|
util/kaldi-table.cc
|
||||||
|
util/kaldi-thread.cc
|
||||||
|
util/parse-options.cc
|
||||||
|
util/simple-io-funcs.cc
|
||||||
|
util/simple-options.cc
|
||||||
|
util/text-utils.cc
|
||||||
|
)
|
||||||
|
target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)
|
||||||
|
|
||||||
|
add_library(kaldi-mfcc
|
||||||
|
feat/feature-mfcc.cc
|
||||||
|
)
|
||||||
|
target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
|
||||||
|
|
||||||
|
add_library(kaldi-fbank
|
||||||
|
feat/feature-fbank.cc
|
||||||
|
)
|
||||||
|
target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
|
||||||
|
|
||||||
|
add_library(kaldi-feat-common
|
||||||
|
feat/wave-reader.cc
|
||||||
|
feat/signal.cc
|
||||||
|
feat/feature-functions.cc
|
||||||
|
feat/feature-window.cc
|
||||||
|
feat/resample.cc
|
||||||
|
feat/mel-computations.cc
|
||||||
|
feat/cmvn.cc
|
||||||
|
)
|
||||||
|
target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
|
||||||
|
|
||||||
|
add_library(kaldi-matrix
|
||||||
|
matrix/compressed-matrix.cc
|
||||||
|
matrix/kaldi-matrix.cc
|
||||||
|
matrix/kaldi-vector.cc
|
||||||
|
matrix/matrix-functions.cc
|
||||||
|
matrix/optimization.cc
|
||||||
|
matrix/packed-matrix.cc
|
||||||
|
matrix/qr.cc
|
||||||
|
matrix/sparse-matrix.cc
|
||||||
|
matrix/sp-matrix.cc
|
||||||
|
matrix/srfft.cc
|
||||||
|
matrix/tp-matrix.cc
|
||||||
|
)
|
||||||
|
target_link_libraries(kaldi-matrix gfortran kaldi-base libopenblas.a)
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../../speechx/speechx/kaldi/base
|
@ -0,0 +1 @@
|
|||||||
|
../../../../speechx/speechx/kaldi/feat
|
@ -0,0 +1 @@
|
|||||||
|
../../../../speechx/speechx/kaldi/matrix
|
@ -0,0 +1 @@
|
|||||||
|
../../../../speechx/speechx/kaldi/util
|
Loading…
Reference in new issue