Merge pull request #2051 from SmileGoat/audio_dev
[audio] add kaldi fbank to paddleaudiopull/2088/head
commit
e17b7542f1
@ -0,0 +1 @@
|
||||
fc_patch/
|
@ -0,0 +1,39 @@
|
||||
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
|
||||
|
||||
project(paddleaudio VERSION 0.1)
|
||||
|
||||
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
|
||||
|
||||
# cmake dir
|
||||
set(paddleaudio_cmake_dir ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
# Modules
|
||||
list(APPEND CMAKE_MODULE_PATH ${paddleaudio_cmake_dir}/external)
|
||||
list(APPEND CMAKE_MODULE_PATH ${paddleaudio_cmake_dir})
|
||||
include(FetchContent)
|
||||
include(ExternalProject)
|
||||
|
||||
# fc_patch dir
|
||||
set(FETCHCONTENT_QUIET off)
|
||||
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
|
||||
set(FETCHCONTENT_BASE_DIR ${fc_patch})
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fPIC -O0 -Wall -g")
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
option(BUILD_SOX "Build libsox statically" ON)
|
||||
|
||||
|
||||
# checkout the thirdparty/kaldi/base/kaldi-types.h
|
||||
# compile kaldi without openfst
|
||||
add_definitions("-DCOMPILE_WITHOUT_OPENFST")
|
||||
|
||||
include(openblas)
|
||||
include(pybind)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/third_party/kaldi)
|
||||
include_directories(/usr/include/python3.7m)
|
||||
add_subdirectory(third_party)
|
||||
add_subdirectory(csrc)
|
@ -0,0 +1,145 @@
|
||||
#.rst:
|
||||
# FindGFortranLibs
|
||||
# --------
|
||||
# https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake
|
||||
# https://enccs.github.io/cmake-workshop/cxx-fortran/
|
||||
#
|
||||
# Find gcc Fortran compiler & library paths
|
||||
#
|
||||
# The module defines the following variables:
|
||||
#
|
||||
# ::
|
||||
#
|
||||
#
|
||||
# GFORTRANLIBS_FOUND - true if system has gfortran
|
||||
# LIBGFORTRAN_LIBRARIES - path to libgfortran
|
||||
# LIBQUADMATH_LIBRARIES - path to libquadmath
|
||||
# GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath
|
||||
# GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers
|
||||
# LIBGOMP_LIBRARIES - path to libgomp
|
||||
# LIBGOMP_INCLUDE_DIR - directory containing omp.h header
|
||||
# GFORTRAN_VERSION_STRING - version of gfortran found
|
||||
#
|
||||
set(CMAKE_REQUIRED_QUIET ${LIBIOMP_FIND_QUIETLY})
|
||||
|
||||
if(NOT CMAKE_REQUIRED_QUIET)
|
||||
message(STATUS "Looking for gfortran related libraries...")
|
||||
endif()
|
||||
|
||||
enable_language(Fortran)
|
||||
if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
|
||||
|
||||
# Basically, call "gfortran -v" to dump compiler info to the string
|
||||
# GFORTRAN_VERBOSE_STR, which will be used to get necessary paths
|
||||
message(STATUS "Extracting library and header information by calling 'gfortran -v'...")
|
||||
execute_process(COMMAND "${CMAKE_Fortran_COMPILER}" "-v" ERROR_VARIABLE
|
||||
GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG)
|
||||
|
||||
# For debugging
|
||||
message(STATUS "'gfortran -v' returned:")
|
||||
message(STATUS "${GFORTRAN_VERBOSE_STR}")
|
||||
|
||||
# Detect gfortran version
|
||||
string(REGEX MATCH "gcc version [^\t\n ]+" GFORTRAN_VER_STR "${GFORTRAN_VERBOSE_STR}")
|
||||
string(REGEX REPLACE "gcc version ([^\t\n ]+)" "\\1" GFORTRAN_VERSION_STRING "${GFORTRAN_VER_STR}")
|
||||
message(STATUS "Detected gfortran version ${GFORTRAN_VERSION_STRING}")
|
||||
unset(GFORTRAN_VER_STR)
|
||||
|
||||
set(MATCH_REGEX "[^\t\n ]+[\t\n ]+")
|
||||
set(REPLACE_REGEX "([^\t\n ]+)")
|
||||
|
||||
# Find architecture for compiler
|
||||
string(REGEX MATCH "Target: [^\t\n ]+"
|
||||
GFORTRAN_ARCH_STR "${GFORTRAN_VERBOSE_STR}")
|
||||
message(STATUS "Architecture string: ${GFORTRAN_ARCH_STR}")
|
||||
string(REGEX REPLACE "Target: ([^\t\n ]+)" "\\1"
|
||||
GFORTRAN_ARCH "${GFORTRAN_ARCH_STR}")
|
||||
message(STATUS "Detected gfortran architecture: ${GFORTRAN_ARCH}")
|
||||
unset(GFORTRAN_ARCH_STR)
|
||||
|
||||
# Find install prefix, if it exists; if not, use default
|
||||
string(REGEX MATCH "--prefix=[^\t\n ]+[\t\n ]+"
|
||||
GFORTRAN_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
|
||||
if(NOT GFORTRAN_PREFIX_STR)
|
||||
message(STATUS "Detected default gfortran prefix")
|
||||
set(GFORTRAN_PREFIX_DIR "/usr/local") # default prefix for gcc install
|
||||
else()
|
||||
string(REGEX REPLACE "--prefix=([^\t\n ]+)" "\\1"
|
||||
GFORTRAN_PREFIX_DIR "${GFORTRAN_PREFIX_STR}")
|
||||
endif()
|
||||
message(STATUS "Detected gfortran prefix: ${GFORTRAN_PREFIX_DIR}")
|
||||
unset(GFORTRAN_PREFIX_STR)
|
||||
|
||||
# Find install exec-prefix, if it exists; if not, use default
|
||||
string(REGEX MATCH "--exec-prefix=[^\t\n ]+[\t\n ]+" "\\1"
|
||||
GFORTRAN_EXEC_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
|
||||
if(NOT GFORTRAN_EXEC_PREFIX_STR)
|
||||
message(STATUS "Detected default gfortran exec-prefix")
|
||||
set(GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_PREFIX_DIR}")
|
||||
else()
|
||||
string(REGEX REPLACE "--exec-prefix=([^\t\n ]+)" "\\1"
|
||||
GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_EXEC_PREFIX_STR}")
|
||||
endif()
|
||||
message(STATUS "Detected gfortran exec-prefix: ${GFORTRAN_EXEC_PREFIX_DIR}")
|
||||
UNSET(GFORTRAN_EXEC_PREFIX_STR)
|
||||
|
||||
# Find library directory and include directory, if library directory specified
|
||||
string(REGEX MATCH "--libdir=[^\t\n ]+"
|
||||
GFORTRAN_LIB_DIR_STR "${GFORTRAN_VERBOSE_STR}")
|
||||
if(NOT GFORTRAN_LIB_DIR_STR)
|
||||
message(STATUS "Found --libdir flag -- not found")
|
||||
message(STATUS "Using default gfortran library & include directory paths")
|
||||
set(GFORTRAN_LIBRARIES_DIR
|
||||
"${GFORTRAN_EXEC_PREFIX_DIR}/lib/gcc/${GFORTRAN_ARCH}/${GFORTRAN_VERSION_STRING}")
|
||||
string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/include")
|
||||
else()
|
||||
message(STATUS "Found --libdir flag -- yes")
|
||||
string(REGEX REPLACE "--libdir=([^\t\n ]+)" "\\1"
|
||||
GFORTRAN_LIBRARIES_DIR "${GFORTRAN_LIB_DIR_STR}")
|
||||
string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/gcc/" "${GFORTRAN_ARCH}" "/" "${GFORTRAN_VERSION_STRING}" "/include")
|
||||
endif()
|
||||
message(STATUS "gfortran libraries path: ${GFORTRAN_LIBRARIES_DIR}")
|
||||
message(STATUS "gfortran include path dir: ${GFORTRAN_INCLUDE_DIR}")
|
||||
unset(GFORTRAN_LIB_DIR_STR)
|
||||
|
||||
# There are lots of other build options for gcc & gfortran. For now, the
|
||||
# options implemented above should cover a lot of common use cases.
|
||||
|
||||
# Clean up be deleting the output string from "gfortran -v"
|
||||
unset(GFORTRAN_VERBOSE_STR)
|
||||
|
||||
# Find paths for libgfortran, libquadmath, libgomp
|
||||
# libgomp needed for OpenMP support without Clang
|
||||
find_library(LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran
|
||||
HINTS ${GFORTRAN_LIBRARIES_DIR})
|
||||
find_library(LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath
|
||||
HINTS ${GFORTRAN_LIBRARIES_DIR})
|
||||
find_library(LIBGOMP_LIBRARIES NAMES gomp libgomp
|
||||
HINTS ${GFORTRAN_LIBRARIES_DIR})
|
||||
|
||||
# Find OpenMP headers
|
||||
find_path(LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS ${GFORTRAN_INCLUDE_DIR})
|
||||
|
||||
else()
|
||||
message(STATUS "CMAKE_Fortran_COMPILER_ID does not match 'GNU'!")
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
# Required: libgfortran, libquadmath, path for gfortran libraries
|
||||
# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers
|
||||
find_package_handle_standard_args(GFortranLibs
|
||||
REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR
|
||||
VERSION_VAR GFORTRAN_VERSION_STRING)
|
||||
|
||||
if(GFORTRANLIBS_FOUND)
|
||||
message(STATUS "Looking for gfortran libraries -- found")
|
||||
message(STATUS "gfortran version: ${GFORTRAN_VERSION_STRING}")
|
||||
else()
|
||||
message(STATUS "Looking for gfortran libraries -- not found")
|
||||
endif()
|
||||
|
||||
mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
|
||||
LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
|
||||
GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR)
|
||||
# FindGFortranLIBS.cmake ends here
|
@ -0,0 +1,58 @@
|
||||
include(FetchContent)
|
||||
|
||||
set(OpenBLAS_SOURCE_DIR ${fc_patch}/OpenBLAS-src)
|
||||
set(OpenBLAS_PREFIX ${fc_patch}/OpenBLAS-prefix)
|
||||
|
||||
# ######################################################################################################################
|
||||
# OPENBLAS https://github.com/lattice/quda/blob/develop/CMakeLists.txt#L575
|
||||
# ######################################################################################################################
|
||||
enable_language(Fortran)
|
||||
|
||||
include(FortranCInterface)
|
||||
|
||||
# # Clang doesn't have a Fortran compiler in its suite (yet),
|
||||
# # so detect libraries for gfortran; we need equivalents to
|
||||
# # libgfortran and libquadmath, which are implicitly
|
||||
# # linked by flags in CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES
|
||||
# include(FindGFortranLibs REQUIRED)
|
||||
# # Add directory containing libgfortran and libquadmath to
|
||||
# # linker. Should also contain libgomp, if not using
|
||||
# # Intel OpenMP runtime
|
||||
# link_directories(${GFORTRAN_LIBRARIES_DIR})
|
||||
# # gfortan dir in the docker.
|
||||
# link_directories(/usr/local/gcc-8.2/lib64)
|
||||
# # if you are working with C and Fortran
|
||||
# FortranCInterface_VERIFY()
|
||||
|
||||
# # if you are working with C++ and Fortran
|
||||
# FortranCInterface_VERIFY(CXX)
|
||||
|
||||
|
||||
#TODO: switch to CPM
|
||||
include(GNUInstallDirs)
|
||||
ExternalProject_Add(
|
||||
OPENBLAS
|
||||
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
|
||||
GIT_TAG v0.3.10
|
||||
GIT_SHALLOW YES
|
||||
PREFIX ${OpenBLAS_PREFIX}
|
||||
SOURCE_DIR ${OpenBLAS_SOURCE_DIR}
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
|
||||
CMAKE_GENERATOR "Unix Makefiles")
|
||||
|
||||
|
||||
# https://cmake.org/cmake/help/latest/module/ExternalProject.html?highlight=externalproject_get_property#external-project-definition
|
||||
ExternalProject_Get_Property(OPENBLAS INSTALL_DIR)
|
||||
set(OpenBLAS_INSTALL_PREFIX ${INSTALL_DIR})
|
||||
add_library(openblas STATIC IMPORTED)
|
||||
add_dependencies(openblas OPENBLAS)
|
||||
set_target_properties(openblas PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES Fortran)
|
||||
# ${CMAKE_INSTALL_LIBDIR} lib
|
||||
set_target_properties(openblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a)
|
||||
|
||||
|
||||
# https://cmake.org/cmake/help/latest/command/install.html?highlight=cmake_install_libdir#installing-targets
|
||||
# ${CMAKE_INSTALL_LIBDIR} lib
|
||||
# ${CMAKE_INSTALL_INCLUDEDIR} include
|
||||
link_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
|
||||
include_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/openblas)
|
@ -0,0 +1,9 @@
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
pybind
|
||||
URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.0.zip
|
||||
URL_HASH SHA256=1c6e0141f7092867c5bf388bc3acdb2689ed49f59c3977651394c6c87ae88232
|
||||
)
|
||||
FetchContent_MakeAvailable(pybind)
|
||||
include_directories(${pybind_SOURCE_DIR}/include)
|
||||
|
@ -0,0 +1,15 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||
|
||||
include_directories(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
add_library(kaldi_feature
|
||||
kaldi_feature.cc
|
||||
kaldi_feature_wrapper.cc
|
||||
)
|
||||
target_link_libraries(kaldi_feature kaldi-fbank)
|
||||
|
||||
pybind11_add_module(kaldi_frontend kaldi_feature.cc kaldi_feature_wrapper.cc)
|
||||
target_link_libraries(kaldi_frontend PRIVATE kaldi_feature)
|
@ -0,0 +1,52 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "feat/feature-window.h"
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
||||
namespace paddleaudio {
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
template <class F>
|
||||
class StreamingFeatureTpl {
|
||||
public:
|
||||
typedef typename F::Options Options;
|
||||
StreamingFeatureTpl(const Options& opts);
|
||||
bool ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav,
|
||||
kaldi::Vector<kaldi::BaseFloat>* feats);
|
||||
void Reset() {
|
||||
remained_wav_.Resize(0);
|
||||
}
|
||||
|
||||
int Dim() {
|
||||
return computer_.Dim();
|
||||
}
|
||||
|
||||
private:
|
||||
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
|
||||
kaldi::Vector<kaldi::BaseFloat>* feats);
|
||||
Options opts_;
|
||||
kaldi::FeatureWindowFunction window_function_;
|
||||
kaldi::Vector<kaldi::BaseFloat> remained_wav_;
|
||||
F computer_;
|
||||
};
|
||||
|
||||
} // namespace ppspeech
|
||||
|
||||
#include "feature_common_inl.h"
|
||||
|
@ -0,0 +1,92 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
|
||||
namespace paddleaudio {
|
||||
|
||||
template <class F>
|
||||
StreamingFeatureTpl<F>::StreamingFeatureTpl(
|
||||
const Options& opts)
|
||||
: opts_(opts), computer_(opts),
|
||||
window_function_(opts.frame_opts) {
|
||||
//window_function_(computer_.GetFrameOptions()) { the opt set to zero
|
||||
}
|
||||
|
||||
template <class F>
|
||||
bool StreamingFeatureTpl<F>::ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav,
|
||||
kaldi::Vector<kaldi::BaseFloat>* feats) {
|
||||
// append remaned waves
|
||||
kaldi::int32 wav_len = wav.Dim();
|
||||
if (wav_len == 0) return false;
|
||||
kaldi::int32 left_len = remained_wav_.Dim();
|
||||
kaldi::Vector<kaldi::BaseFloat> waves(left_len + wav_len);
|
||||
waves.Range(0, left_len).CopyFromVec(remained_wav_);
|
||||
waves.Range(left_len, wav_len).CopyFromVec(wav);
|
||||
|
||||
// cache remaned waves
|
||||
kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
|
||||
kaldi::int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts);
|
||||
kaldi::int32 frame_shift = frame_opts.WindowShift();
|
||||
kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames;
|
||||
remained_wav_.Resize(left_samples);
|
||||
remained_wav_.CopyFromVec(
|
||||
waves.Range(frame_shift * num_frames, left_samples));
|
||||
|
||||
// compute speech feature
|
||||
Compute(waves, feats);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Compute feat
|
||||
template <class F>
|
||||
bool StreamingFeatureTpl<F>::Compute(
|
||||
const kaldi::Vector<kaldi::BaseFloat>& waves,
|
||||
kaldi::Vector<kaldi::BaseFloat>* feats) {
|
||||
kaldi::BaseFloat vtln_warp = 1.0;
|
||||
const kaldi::FrameExtractionOptions& frame_opts =
|
||||
computer_.GetFrameOptions();
|
||||
kaldi::int32 num_samples = waves.Dim();
|
||||
kaldi::int32 frame_length = frame_opts.WindowSize();
|
||||
kaldi::int32 sample_rate = frame_opts.samp_freq;
|
||||
if (num_samples < frame_length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
kaldi::int32 num_frames = kaldi::NumFrames(num_samples, frame_opts);
|
||||
feats->Resize(num_frames * Dim());
|
||||
|
||||
kaldi::Vector<kaldi::BaseFloat> window;
|
||||
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
|
||||
for (kaldi::int32 frame = 0; frame < num_frames; frame++) {
|
||||
kaldi::BaseFloat raw_log_energy = 0.0;
|
||||
kaldi::ExtractWindow(0,
|
||||
waves,
|
||||
frame,
|
||||
frame_opts,
|
||||
window_function_,
|
||||
&window,
|
||||
need_raw_log_energy ? &raw_log_energy : NULL);
|
||||
|
||||
kaldi::Vector<kaldi::BaseFloat> this_feature(computer_.Dim(),
|
||||
kaldi::kUndefined);
|
||||
computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature);
|
||||
kaldi::SubVector<kaldi::BaseFloat> output_row(
|
||||
feats->Data() + frame * Dim(), Dim());
|
||||
output_row.CopyFromVec(this_feature);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace paddleaudio
|
@ -0,0 +1,143 @@
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
||||
#include "kaldi_feature_wrapper.h"
|
||||
|
||||
namespace py=pybind11;
|
||||
|
||||
bool InitFbank(
|
||||
float samp_freq, // frame opts
|
||||
float frame_shift_ms,
|
||||
float frame_length_ms,
|
||||
float dither,
|
||||
float preemph_coeff,
|
||||
bool remove_dc_offset,
|
||||
std::string window_type, // e.g. Hamming window
|
||||
bool round_to_power_of_two,
|
||||
float blackman_coeff,
|
||||
bool snip_edges,
|
||||
bool allow_downsample,
|
||||
bool allow_upsample,
|
||||
int max_feature_vectors,
|
||||
int num_bins, // mel opts
|
||||
float low_freq,
|
||||
float high_freq,
|
||||
float vtln_low,
|
||||
float vtln_high,
|
||||
bool debug_mel,
|
||||
bool htk_mode,
|
||||
bool use_energy, // fbank opts
|
||||
float energy_floor,
|
||||
bool raw_energy,
|
||||
bool htk_compat,
|
||||
bool use_log_fbank,
|
||||
bool use_power) {
|
||||
kaldi::FbankOptions opts;
|
||||
opts.frame_opts.samp_freq = samp_freq; // frame opts
|
||||
opts.frame_opts.frame_shift_ms = frame_shift_ms;
|
||||
opts.frame_opts.frame_length_ms = frame_length_ms;
|
||||
opts.frame_opts.dither = dither;
|
||||
opts.frame_opts.preemph_coeff = preemph_coeff;
|
||||
opts.frame_opts.remove_dc_offset = remove_dc_offset;
|
||||
opts.frame_opts.window_type = window_type;
|
||||
opts.frame_opts.round_to_power_of_two = round_to_power_of_two;
|
||||
opts.frame_opts.blackman_coeff = blackman_coeff;
|
||||
opts.frame_opts.snip_edges = snip_edges;
|
||||
opts.frame_opts.allow_downsample = allow_downsample;
|
||||
opts.frame_opts.allow_upsample = allow_upsample;
|
||||
opts.frame_opts.max_feature_vectors = max_feature_vectors;
|
||||
|
||||
opts.mel_opts.num_bins = num_bins; // mel opts
|
||||
opts.mel_opts.low_freq = low_freq;
|
||||
opts.mel_opts.high_freq = high_freq;
|
||||
opts.mel_opts.vtln_low = vtln_low;
|
||||
opts.mel_opts.vtln_high = vtln_high;
|
||||
opts.mel_opts.debug_mel = debug_mel;
|
||||
opts.mel_opts.htk_mode = htk_mode;
|
||||
|
||||
opts.use_energy = use_energy; // fbank opts
|
||||
opts.energy_floor = energy_floor;
|
||||
opts.raw_energy = raw_energy;
|
||||
opts.htk_compat = htk_compat;
|
||||
opts.use_log_fbank = use_log_fbank;
|
||||
opts.use_power = use_power;
|
||||
paddleaudio::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
|
||||
return true;
|
||||
}
|
||||
|
||||
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav) {
|
||||
return paddleaudio::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav);
|
||||
}
|
||||
|
||||
py::array_t<double> ComputeFbank(
|
||||
float samp_freq, // frame opts
|
||||
float frame_shift_ms,
|
||||
float frame_length_ms,
|
||||
float dither,
|
||||
float preemph_coeff,
|
||||
bool remove_dc_offset,
|
||||
std::string window_type, // e.g. Hamming window
|
||||
bool round_to_power_of_two,
|
||||
float blackman_coeff,
|
||||
bool snip_edges,
|
||||
bool allow_downsample,
|
||||
bool allow_upsample,
|
||||
int max_feature_vectors,
|
||||
int num_bins, // mel opts
|
||||
float low_freq,
|
||||
float high_freq,
|
||||
float vtln_low,
|
||||
float vtln_high,
|
||||
bool debug_mel,
|
||||
bool htk_mode,
|
||||
bool use_energy, // fbank opts
|
||||
float energy_floor,
|
||||
bool raw_energy,
|
||||
bool htk_compat,
|
||||
bool use_log_fbank,
|
||||
bool use_power,
|
||||
const py::array_t<double>& wav) {
|
||||
InitFbank(samp_freq, // frame opts
|
||||
frame_shift_ms,
|
||||
frame_length_ms,
|
||||
dither,
|
||||
preemph_coeff,
|
||||
remove_dc_offset,
|
||||
window_type, // e.g. Hamming window
|
||||
round_to_power_of_two,
|
||||
blackman_coeff,
|
||||
snip_edges,
|
||||
allow_downsample,
|
||||
allow_upsample,
|
||||
max_feature_vectors,
|
||||
num_bins, // mel opts
|
||||
low_freq,
|
||||
high_freq,
|
||||
vtln_low,
|
||||
vtln_high,
|
||||
debug_mel,
|
||||
htk_mode,
|
||||
use_energy, // fbank opts
|
||||
energy_floor,
|
||||
raw_energy,
|
||||
htk_compat,
|
||||
use_log_fbank,
|
||||
use_power);
|
||||
py::array_t<double> result = ComputeFbankStreaming(wav);
|
||||
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void ResetFbank() {
|
||||
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank();
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(kaldi_featurepy, m) {
|
||||
m.doc() = "kaldi_feature example";
|
||||
m.def("InitFbank", &InitFbank, "init fbank");
|
||||
m.def("ResetFbank", &ResetFbank, "reset fbank");
|
||||
m.def("ComputeFbank", &ComputeFbank, "compute fbank");
|
||||
m.def("ComputeFbankStreaming", &ComputeFbankStreaming, "compute fbank streaming");
|
||||
}
|
@ -0,0 +1,71 @@
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
||||
#include "kaldi_feature_wrapper.h"
|
||||
|
||||
namespace py=pybind11;
|
||||
|
||||
bool InitFbank(
|
||||
float samp_freq, // frame opts
|
||||
float frame_shift_ms,
|
||||
float frame_length_ms,
|
||||
float dither,
|
||||
float preemph_coeff,
|
||||
bool remove_dc_offset,
|
||||
std::string window_type, // e.g. Hamming window
|
||||
bool round_to_power_of_two,
|
||||
float blackman_coeff,
|
||||
bool snip_edges,
|
||||
bool allow_downsample,
|
||||
bool allow_upsample,
|
||||
int max_feature_vectors,
|
||||
int num_bins, // mel opts
|
||||
float low_freq,
|
||||
float high_freq,
|
||||
float vtln_low,
|
||||
float vtln_high,
|
||||
bool debug_mel,
|
||||
bool htk_mode,
|
||||
bool use_energy, // fbank opts
|
||||
float energy_floor,
|
||||
bool raw_energy,
|
||||
bool htk_compat,
|
||||
bool use_log_fbank,
|
||||
bool use_power);
|
||||
|
||||
py::array_t<double> ComputeFbank(
|
||||
float samp_freq, // frame opts
|
||||
float frame_shift_ms,
|
||||
float frame_length_ms,
|
||||
float dither,
|
||||
float preemph_coeff,
|
||||
bool remove_dc_offset,
|
||||
std::string window_type, // e.g. Hamming window
|
||||
bool round_to_power_of_two,
|
||||
kaldi::BaseFloat blackman_coeff,
|
||||
bool snip_edges,
|
||||
bool allow_downsample,
|
||||
bool allow_upsample,
|
||||
int max_feature_vectors,
|
||||
int num_bins, // mel opts
|
||||
float low_freq,
|
||||
float high_freq,
|
||||
float vtln_low,
|
||||
float vtln_high,
|
||||
bool debug_mel,
|
||||
bool htk_mode,
|
||||
bool use_energy, // fbank opts
|
||||
float energy_floor,
|
||||
bool raw_energy,
|
||||
bool htk_compat,
|
||||
bool use_log_fbank,
|
||||
bool use_power,
|
||||
const py::array_t<double>& wav);
|
||||
|
||||
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav);
|
||||
|
||||
void ResetFbank();
|
||||
|
||||
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav);
|
||||
|
||||
py::array_t<double> TestFun(const py::array_t<double>& wav);
|
@ -0,0 +1,57 @@
|
||||
#include "kaldi_feature_wrapper.h"
|
||||
|
||||
namespace paddleaudio {
|
||||
|
||||
KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
|
||||
static KaldiFeatureWrapper instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
|
||||
fbank_.reset(new Fbank(opts));
|
||||
return true;
|
||||
}
|
||||
|
||||
py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double> wav) {
|
||||
py::buffer_info info = wav.request();
|
||||
kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
|
||||
double* wav_ptr = (double*)info.ptr;
|
||||
for (int idx = 0; idx < info.size; ++idx) {
|
||||
input_wav(idx) = *wav_ptr;
|
||||
wav_ptr++;
|
||||
}
|
||||
|
||||
|
||||
kaldi::Vector<kaldi::BaseFloat> feats;
|
||||
bool flag = fbank_->ComputeFeature(input_wav, &feats);
|
||||
if (flag == false || feats.Dim() == 0) return py::array_t<double>();
|
||||
auto result = py::array_t<double>(feats.Dim());
|
||||
py::buffer_info xs = result.request();
|
||||
for (int idx = 0; idx < 10; ++idx) {
|
||||
float val = feats(idx);
|
||||
std::cout << val << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
double* res_ptr = (double*)xs.ptr;
|
||||
for (int idx = 0; idx < feats.Dim(); ++idx) {
|
||||
*res_ptr = feats(idx);
|
||||
res_ptr++;
|
||||
}
|
||||
|
||||
return result.reshape({ feats.Dim() / Dim(), Dim()});
|
||||
/*
|
||||
py::buffer_info info = wav.request();
|
||||
std::cout << info.size << std::endl;
|
||||
auto result = py::array_t<double>(info.size);
|
||||
//kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
|
||||
kaldi::Vector<double> input_wav(info.size);
|
||||
py::buffer_info info_re = result.request();
|
||||
|
||||
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
|
||||
memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()* sizeof(double));
|
||||
return result;
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
} // namespace paddleaudio
|
@ -0,0 +1,28 @@
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "feature_common.h"
|
||||
#include "feat/feature-fbank.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace paddleaudio {
|
||||
|
||||
typedef StreamingFeatureTpl<kaldi::FbankComputer> Fbank;
|
||||
|
||||
class KaldiFeatureWrapper {
|
||||
public:
|
||||
static KaldiFeatureWrapper* GetInstance();
|
||||
bool InitFbank(kaldi::FbankOptions opts);
|
||||
py::array_t<double> ComputeFbank(const py::array_t<double> wav);
|
||||
int Dim() {
|
||||
return fbank_->Dim();
|
||||
}
|
||||
void ResetFbank() {
|
||||
fbank_->Reset();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<paddleaudio::Fbank> fbank_;
|
||||
};
|
||||
|
||||
} // namespace paddleaudio
|
@ -0,0 +1,60 @@
|
||||
project(kaldi)
|
||||
|
||||
|
||||
add_library(kaldi-base
|
||||
base/io-funcs.cc
|
||||
base/kaldi-error.cc
|
||||
base/kaldi-math.cc
|
||||
base/kaldi-utils.cc
|
||||
base/timer.cc
|
||||
)
|
||||
|
||||
add_library(kaldi-util
|
||||
util/kaldi-holder.cc
|
||||
util/kaldi-io.cc
|
||||
util/kaldi-semaphore.cc
|
||||
util/kaldi-table.cc
|
||||
util/kaldi-thread.cc
|
||||
util/parse-options.cc
|
||||
util/simple-io-funcs.cc
|
||||
util/simple-options.cc
|
||||
util/text-utils.cc
|
||||
)
|
||||
target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)
|
||||
|
||||
add_library(kaldi-mfcc
|
||||
feat/feature-mfcc.cc
|
||||
)
|
||||
target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
|
||||
|
||||
add_library(kaldi-fbank
|
||||
feat/feature-fbank.cc
|
||||
)
|
||||
target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
|
||||
|
||||
add_library(kaldi-feat-common
|
||||
feat/wave-reader.cc
|
||||
feat/signal.cc
|
||||
feat/feature-functions.cc
|
||||
feat/feature-window.cc
|
||||
feat/resample.cc
|
||||
feat/mel-computations.cc
|
||||
feat/cmvn.cc
|
||||
)
|
||||
target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
|
||||
|
||||
add_library(kaldi-matrix
|
||||
matrix/compressed-matrix.cc
|
||||
matrix/kaldi-matrix.cc
|
||||
matrix/kaldi-vector.cc
|
||||
matrix/matrix-functions.cc
|
||||
matrix/optimization.cc
|
||||
matrix/packed-matrix.cc
|
||||
matrix/qr.cc
|
||||
matrix/sparse-matrix.cc
|
||||
matrix/sp-matrix.cc
|
||||
matrix/srfft.cc
|
||||
matrix/tp-matrix.cc
|
||||
)
|
||||
target_link_libraries(kaldi-matrix gfortran kaldi-base libopenblas.a)
|
||||
|
@ -0,0 +1 @@
|
||||
../../../../speechx/speechx/kaldi/base
|
@ -0,0 +1 @@
|
||||
../../../../speechx/speechx/kaldi/feat
|
@ -0,0 +1 @@
|
||||
../../../../speechx/speechx/kaldi/matrix
|
@ -0,0 +1 @@
|
||||
../../../../speechx/speechx/kaldi/util
|
Loading…
Reference in new issue