Merge pull request #2094 from zh794390558/kaldi

[audio] build with kaldi pybind
pull/2102/head
YangZhou 3 years ago committed by GitHub
commit 3dcf92356b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,8 +13,10 @@ if(NOT CMAKE_VERSION VERSION_LESS 3.15.0)
cmake_policy(SET CMP0092 NEW)
endif()
project(paddlespeech)
# check and set CMAKE_CXX_STANDARD
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
if(env_cxx_standard GREATER -1)
@ -27,25 +29,40 @@ endif()
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_STANDARD 11)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_VERBOSE_MAKEFILE ON)
# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_MAD "Enable libmad" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_PADDLEAUDIO_PYTHON_EXTENSION "Build Python extension" ON)
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake")
# cmake
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}/cmake;${PROJECT_SOURCE_DIR}/cmake/external")
# fc_patch dir
set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch})
include(openblas)
# include(pybind)
# packages
find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG)
message(STATUS "Python_INCLUDE_DIR=" ${Python_INCLUDE_DIR})
add_subdirectory(paddlespeech/audio/third_party)
add_subdirectory(paddlespeech/audio/src)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")
add_subdirectory(paddlespeech/audio)
# Summary
include(cmake/summary.cmake)
onnx_print_configuration_summary()

3
audio/.gitignore vendored

@ -1,3 +0,0 @@
build
third_party/archives/
third_party/install/

@ -1,14 +0,0 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
project(paddleaudio VERSION 0.1)
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_STANDARD 11)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
option(BUILD_SOX "Build libsox statically" ON)
add_subdirectory(third_party)
add_subdirectory(audio/csrc)

@ -1,34 +0,0 @@
find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG)
function(define_extension name sources libraries)
add_library(${name} SHARED ${sources})
target_include_directories(
${name} PRIVATE ${PROJECT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${Python3_INCLUDE_DIRS} ${pybind11_INCLUDE_DIR})
target_link_libraries(${name} ${libraries})
set_target_properties(${name} PROPERTIES PREFIX "")
install(
TARGETS ${name}
LIBRARY DESTINATION .
)
endfunction()
if(BUILD_SOX)
set(
EXTENSION_SOURCES
pybind/pybind.cpp
pybind/sox/io.cpp
pybind/sox/utils.cpp
)
set(
LINK_LIBRARIES
libsox
)
define_extension(
_paddleaudio
"${EXTENSION_SOURCES}"
"${LINK_LIBRARIES}"
)
endif()

@ -1,10 +0,0 @@
#include "pybind/sox/io.h"
PYBIND11_MODULE(_paddleaudio, m) {
m.def("get_info_file",
&paddleaudio::sox_io::get_info_file,
"Get metadata of audio file.");
m.def("get_info_fileobj",
&paddleaudio::sox_io::get_info_fileobj,
"Get metadata of audio in file object.");
}

@ -1,61 +0,0 @@
#include "pybind/sox/io.h"
#include "pybind/sox/utils.h"
using namespace paddleaudio::sox_utils;
namespace paddleaudio {
namespace sox_io {
auto get_info_file(const std::string &path, const std::string &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
SoxFormat sf(
sox_open_read(path.data(),
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.empty() ? nullptr : format.data()));
validate_input_file(sf, path);
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
static_cast<int64_t>(sf->encoding.bits_per_sample),
get_encoding(sf->encoding.encoding));
}
auto get_info_fileobj(py::object fileobj, const std::string &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
const auto capacity = [&]() {
const auto bufsiz = get_buffer_size();
const int64_t kDefaultCapacityInBytes = 4096;
return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
: kDefaultCapacityInBytes;
}();
std::string buffer(capacity, '\0');
auto *buf = const_cast<char *>(buffer.data());
auto num_read = read_fileobj(&fileobj, capacity, buf);
// If the file is shorter than 256, then libsox cannot read the header.
auto buf_size = (num_read > 256) ? num_read : 256;
SoxFormat sf(sox_open_mem_read(
buf,
buf_size,
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.empty() ? nullptr : format.data()));
// In case of streamed data, length can be 0
validate_input_memfile(sf);
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
static_cast<int64_t>(sf->encoding.bits_per_sample),
get_encoding(sf->encoding.encoding));
}
} // namespace paddleaudio
} // namespace sox_io

@ -1,18 +0,0 @@
#ifndef PADDLEAUDIO_PYBIND_SOX_IO_H
#define PADDLEAUDIO_PYBIND_SOX_IO_H
#include "pybind/sox/utils.h"
namespace paddleaudio {
namespace sox_io {
auto get_info_file(const std::string &path, const std::string &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
auto get_info_fileobj(py::object fileobj, const std::string &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
} // namespace paddleaudio
} // namespace sox_io
#endif

@ -1,98 +0,0 @@
#include "pybind/sox/utils.h"
#include <sstream>
namespace paddleaudio {
namespace sox_utils {
SoxFormat::SoxFormat(sox_format_t *fd) noexcept : fd_(fd) {}
SoxFormat::~SoxFormat() { close(); }
sox_format_t *SoxFormat::operator->() const noexcept { return fd_; }
SoxFormat::operator sox_format_t *() const noexcept { return fd_; }
void SoxFormat::close() {
if (fd_ != nullptr) {
sox_close(fd_);
fd_ = nullptr;
}
}
auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
-> uint64_t {
uint64_t num_read = 0;
while (num_read < size) {
auto request = size - num_read;
auto chunk = static_cast<std::string>(
static_cast<py::bytes>(fileobj->attr("read")(request)));
auto chunk_len = chunk.length();
if (chunk_len == 0) {
break;
}
if (chunk_len > request) {
std::ostringstream message;
message
<< "Requested up to " << request << " bytes but, "
<< "received " << chunk_len << " bytes. "
<< "The given object does not confirm to read protocol of file "
"object.";
throw std::runtime_error(message.str());
}
memcpy(buffer, chunk.data(), chunk_len);
buffer += chunk_len;
num_read += chunk_len;
}
return num_read;
}
int64_t get_buffer_size() { return sox_get_globals()->bufsiz; }
void validate_input_file(const SoxFormat &sf, const std::string &path) {
if (static_cast<sox_format_t *>(sf) == nullptr) {
throw std::runtime_error(
"Error loading audio file: failed to open file " + path);
}
if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
throw std::runtime_error("Error loading audio file: unknown encoding.");
}
}
void validate_input_memfile(const SoxFormat &sf) {
return validate_input_file(sf, "<in memory buffer>");
}
std::string get_encoding(sox_encoding_t encoding) {
switch (encoding) {
case SOX_ENCODING_UNKNOWN:
return "UNKNOWN";
case SOX_ENCODING_SIGN2:
return "PCM_S";
case SOX_ENCODING_UNSIGNED:
return "PCM_U";
case SOX_ENCODING_FLOAT:
return "PCM_F";
case SOX_ENCODING_FLAC:
return "FLAC";
case SOX_ENCODING_ULAW:
return "ULAW";
case SOX_ENCODING_ALAW:
return "ALAW";
case SOX_ENCODING_MP3:
return "MP3";
case SOX_ENCODING_VORBIS:
return "VORBIS";
case SOX_ENCODING_AMR_WB:
return "AMR_WB";
case SOX_ENCODING_AMR_NB:
return "AMR_NB";
case SOX_ENCODING_OPUS:
return "OPUS";
case SOX_ENCODING_GSM:
return "GSM";
default:
return "UNKNOWN";
}
}
} // namespace paddleaudio
} // namespace sox_utils

@ -1,42 +0,0 @@
#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H
#define PADDLEAUDIO_PYBIND_SOX_UTILS_H
#include <pybind11/pybind11.h>
#include <sox.h>
namespace py = pybind11;
namespace paddleaudio {
namespace sox_utils {
/// helper class to automatically close sox_format_t*
struct SoxFormat {
explicit SoxFormat(sox_format_t *fd) noexcept;
SoxFormat(const SoxFormat &other) = delete;
SoxFormat(SoxFormat &&other) = delete;
SoxFormat &operator=(const SoxFormat &other) = delete;
SoxFormat &operator=(SoxFormat &&other) = delete;
~SoxFormat();
sox_format_t *operator->() const noexcept;
operator sox_format_t *() const noexcept;
void close();
private:
sox_format_t *fd_;
};
auto read_fileobj(py::object *fileobj, uint64_t size, char *buffer) -> uint64_t;
int64_t get_buffer_size();
void validate_input_file(const SoxFormat &sf, const std::string &path);
void validate_input_memfile(const SoxFormat &sf);
std::string get_encoding(sox_encoding_t encoding);
} // namespace paddleaudio
} // namespace sox_utils
#endif

@ -1,9 +0,0 @@
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
add_library(libsox INTERFACE)
if (BUILD_SOX)
add_subdirectory(sox)
target_include_directories(libsox INTERFACE ${SOX_INCLUDE_DIR})
target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES})
endif()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,86 +0,0 @@
See the followings for the origin of this patch
http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libmad.html
http://www.linuxfromscratch.org/patches/blfs/svn/libmad-0.15.1b-fixes-1.patch
--- src/libmad/configure 2004-02-05 09:34:07.000000000 +0000
+++ src/libmad/configure.new 2020-06-30 21:10:28.528018931 +0000
@@ -19083,71 +19083,7 @@
if test "$GCC" = yes
then
- if test -z "$arch"
- then
- case "$host" in
- i386-*) ;;
- i?86-*) arch="-march=i486" ;;
- arm*-empeg-*) arch="-march=armv4 -mtune=strongarm1100" ;;
- armv4*-*) arch="-march=armv4 -mtune=strongarm" ;;
- powerpc-*) ;;
- mips*-agenda-*) arch="-mcpu=vr4100" ;;
- mips*-luxsonor-*) arch="-mips1 -mcpu=r3000 -Wa,-m4010" ;;
- esac
- fi
-
- case "$optimize" in
- -O|"-O "*)
- optimize="-O"
- optimize="$optimize -fforce-mem"
- optimize="$optimize -fforce-addr"
- : #x optimize="$optimize -finline-functions"
- : #- optimize="$optimize -fstrength-reduce"
- optimize="$optimize -fthread-jumps"
- optimize="$optimize -fcse-follow-jumps"
- optimize="$optimize -fcse-skip-blocks"
- : #x optimize="$optimize -frerun-cse-after-loop"
- : #x optimize="$optimize -frerun-loop-opt"
- : #x optimize="$optimize -fgcse"
- optimize="$optimize -fexpensive-optimizations"
- optimize="$optimize -fregmove"
- : #* optimize="$optimize -fdelayed-branch"
- : #x optimize="$optimize -fschedule-insns"
- optimize="$optimize -fschedule-insns2"
- : #? optimize="$optimize -ffunction-sections"
- : #? optimize="$optimize -fcaller-saves"
- : #> optimize="$optimize -funroll-loops"
- : #> optimize="$optimize -funroll-all-loops"
- : #x optimize="$optimize -fmove-all-movables"
- : #x optimize="$optimize -freduce-all-givs"
- : #? optimize="$optimize -fstrict-aliasing"
- : #* optimize="$optimize -fstructure-noalias"
-
- case "$host" in
- arm*-*)
- optimize="$optimize -fstrength-reduce"
- ;;
- mips*-*)
- optimize="$optimize -fstrength-reduce"
- optimize="$optimize -finline-functions"
- ;;
- i?86-*)
- optimize="$optimize -fstrength-reduce"
- ;;
- powerpc-apple-*)
- # this triggers an internal compiler error with gcc2
- : #optimize="$optimize -fstrength-reduce"
-
- # this is really only beneficial with gcc3
- : #optimize="$optimize -finline-functions"
- ;;
- *)
- # this sometimes provokes bugs in gcc 2.95.2
- : #optimize="$optimize -fstrength-reduce"
- ;;
- esac
- ;;
- esac
+ optimize="-O2"
fi
case "$host" in
@@ -21497,6 +21433,7 @@
then
case "$host" in
i?86-*) FPM="INTEL" ;;
+ x86_64*) FPM="64BIT" ;;
arm*-*) FPM="ARM" ;;
mips*-*) FPM="MIPS" ;;
sparc*-*) FPM="SPARC" ;;

@ -1,16 +0,0 @@
See https://github.com/pytorch/audio/pull/1297
diff -ru sox/src/formats.c sox/src/formats.c
--- sox/src/formats.c 2014-10-26 19:55:50.000000000 -0700
+++ sox/src/formats.c 2021-02-22 16:01:02.833144070 -0800
@@ -333,6 +333,10 @@
assert(ft);
if (!ft->fp)
return sox_false;
- fstat(fileno((FILE*)ft->fp), &st);
+ int fd = fileno((FILE*)ft->fp);
+ if (fd < 0)
+ return sox_false;
+ if (fstat(fd, &st) < 0)
+ return sox_false;
return ((st.st_mode & S_IFMT) == S_IFREG);
}

@ -1,222 +0,0 @@
find_package(PkgConfig REQUIRED)
include(ExternalProject)
set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install)
set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)
set(patch_dir ${PROJECT_SOURCE_DIR}/third_party/patches)
set(COMMON_ARGS --quiet --disable-shared --enable-static --prefix=${INSTALL_DIR} --with-pic --disable-dependency-tracking --disable-debug --disable-examples --disable-doc)
# To pass custom environment variables to ExternalProject_Add command,
# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMANAD>`.
# https://stackoverflow.com/a/62437353
# We constrcut the custom environment variables here
set(envs
"PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig"
"LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}"
"CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
)
ExternalProject_Add(mad
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz
URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690
PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add(amr
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.5.tar.gz
URL_HASH SHA256=2c006cb9d5f651bfb5e60156dbff6af3c9d35c7bbcc9015308c0aff1e14cd341
PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/amr/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/amr/configure ${COMMON_ARGS}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add(lame
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://downloads.sourceforge.net/project/lame/lame/3.99/lame-3.99.5.tar.gz
URL_HASH SHA256=24346b4158e4af3bd9f2e194bb23eb473c75fb7377011523353196b19b9a23ff
PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/lame/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/lame/configure ${COMMON_ARGS} --enable-nasm
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add(ogg
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://ftp.osuosl.org/pub/xiph/releases/ogg/libogg-1.3.3.tar.gz
URL_HASH SHA256=c2e8a485110b97550f453226ec644ebac6cb29d1caef2902c007edab4308d985
PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/configure ${COMMON_ARGS}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add(flac
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ogg
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz
URL_HASH SHA256=91cfc3ed61dc40f47f050a109b08610667d73477af6ef36dcad31c31a4a8d53f
PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/flac/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/flac/configure ${COMMON_ARGS} --with-ogg --disable-cpplibs
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add(vorbis
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ogg
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://ftp.osuosl.org/pub/xiph/releases/vorbis/libvorbis-1.3.6.tar.gz
URL_HASH SHA256=6ed40e0241089a42c48604dc00e362beee00036af2d8b3f46338031c9e0351cb
PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/configure ${COMMON_ARGS} --with-ogg
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add(opus
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ogg
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://ftp.osuosl.org/pub/xiph/releases/opus/opus-1.3.1.tar.gz
URL_HASH SHA256=65b58e1e25b2a114157014736a3d9dfeaad8d41be1c8179866f144a2fb44ff9d
PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opus/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opus/configure ${COMMON_ARGS} --with-ogg
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
ExternalProject_Add(opusfile
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS opus
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://ftp.osuosl.org/pub/xiph/releases/opus/opusfile-0.12.tar.gz
URL_HASH SHA256=118d8601c12dd6a44f52423e68ca9083cc9f2bfe72da7a8c1acb22a80ae3550b
PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/configure ${COMMON_ARGS} --disable-http
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
set(SOX_OPTIONS
--disable-openmp
--with-amrnb
--with-amrwb
--with-flac
--with-lame
--with-mad
--with-oggvorbis
--with-opus
--without-alsa
--without-ao
--without-coreaudio
--without-oss
--without-id3tag
--without-ladspa
--without-magic
--without-png
--without-pulseaudio
--without-sndfile
--without-sndio
--without-sunaudio
--without-waveaudio
--without-wavpack
--without-twolame
)
set(SOX_LIBRARIES
${INSTALL_DIR}/lib/libsox.a
${INSTALL_DIR}/lib/libopencore-amrnb.a
${INSTALL_DIR}/lib/libopencore-amrwb.a
${INSTALL_DIR}/lib/libmad.a
${INSTALL_DIR}/lib/libmp3lame.a
${INSTALL_DIR}/lib/libFLAC.a
${INSTALL_DIR}/lib/libopusfile.a
${INSTALL_DIR}/lib/libopus.a
${INSTALL_DIR}/lib/libvorbisenc.a
${INSTALL_DIR}/lib/libvorbisfile.a
${INSTALL_DIR}/lib/libvorbis.a
${INSTALL_DIR}/lib/libogg.a
)
ExternalProject_Add(sox
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ogg flac vorbis opusfile lame mad amr
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
PATCH_COMMAND patch -p1 < ${patch_dir}/sox.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/sox/configure ${COMMON_ARGS} ${SOX_OPTIONS}
BUILD_BYPRODUCTS ${SOX_LIBRARIES}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
add_dependencies(libsox sox)
set(SOX_INCLUDE_DIR ${INSTALL_DIR}/include PARENT_SCOPE)
set(SOX_LIBRARIES ${SOX_LIBRARIES} PARENT_SCOPE)

@ -142,4 +142,12 @@ endif()
mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR)
# FindGFortranLIBS.cmake ends here
# FindGFortranLIBS.cmake ends here
message(STATUS LIBGFORTRAN_LIBRARIES= ${LIBGFORTRAN_LIBRARIES})
message(STATUS LIBQUADMATH_LIBRARIES= ${LIBQUADMATH_LIBRARIES})
message(STATUS LIBGOMP_LIBRARIES= ${LIBGOMP_LIBRARIES})
message(STATUS LIBGOMP_INCLUDE_DIR= ${LIBGOMP_INCLUDE_DIR})
message(STATUS GFORTRAN_LIBRARIES_DIR= ${GFORTRAN_LIBRARIES_DIR})
message(STATUS GFORTRAN_INCLUDE_DIR= ${GFORTRAN_INCLUDE_DIR})

@ -1,4 +1,5 @@
include(FetchContent)
include(ExternalProject)
set(OpenBLAS_SOURCE_DIR ${fc_patch}/OpenBLAS-src)
set(OpenBLAS_PREFIX ${fc_patch}/OpenBLAS-prefix)
@ -47,12 +48,19 @@ set(OpenBLAS_INSTALL_PREFIX ${INSTALL_DIR})
add_library(openblas STATIC IMPORTED)
add_dependencies(openblas OPENBLAS)
set_target_properties(openblas PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES Fortran)
# ${CMAKE_INSTALL_LIBDIR} lib
set_target_properties(openblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a)
set_target_properties(openblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_INSTALL_PREFIX}/lib/libopenblas.a)
# https://cmake.org/cmake/help/latest/command/install.html?highlight=cmake_install_libdir#installing-targets
# ${CMAKE_INSTALL_LIBDIR} lib
# ${CMAKE_INSTALL_INCLUDEDIR} include
link_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
include_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/openblas)
link_directories(${OpenBLAS_INSTALL_PREFIX}/lib)
include_directories(${OpenBLAS_INSTALL_PREFIX}/include/openblas)
set(OPENBLAS_LIBRARIES
${OpenBLAS_INSTALL_PREFIX}/lib/libopenblas.a
)
add_library(libopenblas INTERFACE)
add_dependencies(libopenblas openblas)
target_include_directories(libopenblas INTERFACE ${OpenBLAS_INSTALL_PREFIX}/include/openblas)
target_link_libraries(libopenblas INTERFACE ${OPENBLAS_LIBRARIES})

@ -1,4 +1,6 @@
include(FetchContent)
include(ExternalProject)
FetchContent_Declare(
pybind
URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.0.zip

@ -33,9 +33,8 @@ function (onnx_print_configuration_summary)
message(STATUS " Protobuf includes : ${PROTOBUF_INCLUDE_DIRS}")
message(STATUS " Protobuf libraries : ${PROTOBUF_LIBRARIES}")
message(STATUS " BUILD_ONNX_PYTHON : ${BUILD_ONNX_PYTHON}")
if (${BUILD_ONNX_PYTHON})
message(STATUS " Python version : ${PY_VERSION}")
message(STATUS " Python executable : ${PYTHON_EXECUTABLE}")
message(STATUS " Python includes : ${PYTHON_INCLUDE_DIR}")
endif()
message(STATUS " Python version : ${Python_VERSION}")
message(STATUS " Python executable : ${Python_EXECUTABLE}")
message(STATUS " Python includes : ${Python_INCLUDE_DIRS}")
endfunction()

@ -1,39 +1,3 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
project(paddleaudio VERSION 0.1)
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
# cmake dir
set(paddleaudio_cmake_dir ${PROJECT_SOURCE_DIR}/cmake)
# Modules
list(APPEND CMAKE_MODULE_PATH ${paddleaudio_cmake_dir}/external)
list(APPEND CMAKE_MODULE_PATH ${paddleaudio_cmake_dir})
include(FetchContent)
include(ExternalProject)
# fc_patch dir
set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fPIC -O0 -Wall -g")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_STANDARD 11)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
option(BUILD_SOX "Build libsox statically" ON)
# checkout the thirdparty/kaldi/base/kaldi-types.h
# compile kaldi without openfst
add_definitions("-DCOMPILE_WITHOUT_OPENFST")
include(openblas)
include(pybind)
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/third_party/kaldi)
include_directories(/usr/include/python3.7m)
add_subdirectory(third_party)
add_subdirectory(csrc)
add_subdirectory(src)

@ -108,12 +108,9 @@ def is_soundfile_available():
def requires_soundfile():
if is_soundfile_available():
def decorator(func):
return func
else:
def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
@ -131,12 +128,9 @@ def is_sox_available():
def requires_sox():
if is_sox_available():
def decorator(func):
return func
else:
def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):

@ -35,11 +35,6 @@ if(BUILD_SOX)
list(
APPEND
LIBPADDLEAUDIO_SOURCES
# sox/io.cpp
# sox/utils.cpp
# sox/effects.cpp
# sox/effects_chain.cpp
# sox/types.cpp
)
list(
APPEND
@ -49,6 +44,20 @@ if(BUILD_SOX)
endif()
if(BUILD_KALDI)
list(
APPEND
LIBPADDLEAUDIO_LINK_LIBRARIES
libkaldi
)
list(
APPEND
LIBPADDLEAUDIO_COMPILE_DEFINITIONS
INCLUDE_KALDI
COMPILE_WITHOUT_OPENFST
)
endif()
#------------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
#------------------------------------------------------------------------------#
@ -79,9 +88,9 @@ define_library(
)
if (APPLE)
set(TORCHAUDIO_LIBRARY libtorchaudio CACHE INTERNAL "")
set(TORCHAUDIO_LIBRARY libpaddleaudio CACHE INTERNAL "")
else()
set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libtorchaudio -Wl,--as-needed CACHE INTERNAL "")
set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libpaddleaudio -Wl,--as-needed CACHE INTERNAL "")
endif()
################################################################################
@ -136,6 +145,15 @@ if(BUILD_SOX)
pybind/sox/utils.cpp
)
endif()
if(BUILD_KALDI)
list(
APPEND
EXTENSION_SOURCES
pybind/kaldi/kaldi_feature_wrapper.cc
pybind/kaldi/kaldi_feature.cc
)
endif()
#----------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
#----------------------------------------------------------------------------#

@ -19,6 +19,7 @@
#include "feat/feature-window.h"
namespace paddleaudio {
namespace kaldi {
namespace py = pybind11;
@ -27,21 +28,22 @@ class StreamingFeatureTpl {
public:
typedef typename F::Options Options;
StreamingFeatureTpl(const Options& opts);
bool ComputeFeature(const kaldi::VectorBase<kaldi::BaseFloat>& wav,
kaldi::Vector<kaldi::BaseFloat>* feats);
bool ComputeFeature(const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
::kaldi::Vector<::kaldi::BaseFloat>* feats);
void Reset() { remained_wav_.Resize(0); }
int Dim() { return computer_.Dim(); }
private:
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,
kaldi::Vector<kaldi::BaseFloat>* feats);
bool Compute(const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
::kaldi::Vector<::kaldi::BaseFloat>* feats);
Options opts_;
kaldi::FeatureWindowFunction window_function_;
kaldi::Vector<kaldi::BaseFloat> remained_wav_;
::kaldi::FeatureWindowFunction window_function_;
::kaldi::Vector<::kaldi::BaseFloat> remained_wav_;
F computer_;
};
} // namespace kaldi
} // namespace ppspeech
#include "feature_common_inl.h"

@ -15,6 +15,7 @@
#include "base/kaldi-common.h"
namespace paddleaudio {
namespace kaldi {
template <class F>
StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
@ -24,21 +25,21 @@ StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
template <class F>
bool StreamingFeatureTpl<F>::ComputeFeature(
const kaldi::VectorBase<kaldi::BaseFloat>& wav,
kaldi::Vector<kaldi::BaseFloat>* feats) {
const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
::kaldi::Vector<::kaldi::BaseFloat>* feats) {
// append remaned waves
kaldi::int32 wav_len = wav.Dim();
::kaldi::int32 wav_len = wav.Dim();
if (wav_len == 0) return false;
kaldi::int32 left_len = remained_wav_.Dim();
kaldi::Vector<kaldi::BaseFloat> waves(left_len + wav_len);
::kaldi::int32 left_len = remained_wav_.Dim();
::kaldi::Vector<::kaldi::BaseFloat> waves(left_len + wav_len);
waves.Range(0, left_len).CopyFromVec(remained_wav_);
waves.Range(left_len, wav_len).CopyFromVec(wav);
// cache remaned waves
kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
kaldi::int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts);
kaldi::int32 frame_shift = frame_opts.WindowShift();
kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames;
::kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
::kaldi::int32 num_frames = ::kaldi::NumFrames(waves.Dim(), frame_opts);
::kaldi::int32 frame_shift = frame_opts.WindowShift();
::kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames;
remained_wav_.Resize(left_samples);
remained_wav_.CopyFromVec(
waves.Range(frame_shift * num_frames, left_samples));
@ -51,26 +52,26 @@ bool StreamingFeatureTpl<F>::ComputeFeature(
// Compute feat
template <class F>
bool StreamingFeatureTpl<F>::Compute(
const kaldi::Vector<kaldi::BaseFloat>& waves,
kaldi::Vector<kaldi::BaseFloat>* feats) {
kaldi::BaseFloat vtln_warp = 1.0;
const kaldi::FrameExtractionOptions& frame_opts =
const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
::kaldi::Vector<::kaldi::BaseFloat>* feats) {
::kaldi::BaseFloat vtln_warp = 1.0;
const ::kaldi::FrameExtractionOptions& frame_opts =
computer_.GetFrameOptions();
kaldi::int32 num_samples = waves.Dim();
kaldi::int32 frame_length = frame_opts.WindowSize();
kaldi::int32 sample_rate = frame_opts.samp_freq;
::kaldi::int32 num_samples = waves.Dim();
::kaldi::int32 frame_length = frame_opts.WindowSize();
::kaldi::int32 sample_rate = frame_opts.samp_freq;
if (num_samples < frame_length) {
return false;
}
kaldi::int32 num_frames = kaldi::NumFrames(num_samples, frame_opts);
::kaldi::int32 num_frames = ::kaldi::NumFrames(num_samples, frame_opts);
feats->Resize(num_frames * Dim());
kaldi::Vector<kaldi::BaseFloat> window;
::kaldi::Vector<::kaldi::BaseFloat> window;
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
for (kaldi::int32 frame = 0; frame < num_frames; frame++) {
kaldi::BaseFloat raw_log_energy = 0.0;
kaldi::ExtractWindow(0,
for (::kaldi::int32 frame = 0; frame < num_frames; frame++) {
::kaldi::BaseFloat raw_log_energy = 0.0;
::kaldi::ExtractWindow(0,
waves,
frame,
frame_opts,
@ -78,14 +79,15 @@ bool StreamingFeatureTpl<F>::Compute(
&window,
need_raw_log_energy ? &raw_log_energy : NULL);
kaldi::Vector<kaldi::BaseFloat> this_feature(computer_.Dim(),
kaldi::kUndefined);
::kaldi::Vector<::kaldi::BaseFloat> this_feature(computer_.Dim(),
::kaldi::kUndefined);
computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature);
kaldi::SubVector<kaldi::BaseFloat> output_row(
::kaldi::SubVector<::kaldi::BaseFloat> output_row(
feats->Data() + frame * Dim(), Dim());
output_row.CopyFromVec(this_feature);
}
return true;
}
} // namespace kaldi
} // namespace paddleaudio

@ -1,10 +1,21 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
#include "kaldi_feature_wrapper.h"
namespace py = pybind11;
namespace paddleaudio {
namespace kaldi {
bool InitFbank(float samp_freq, // frame opts
float frame_shift_ms,
@ -32,7 +43,7 @@ bool InitFbank(float samp_freq, // frame opts
bool htk_compat,
bool use_log_fbank,
bool use_power) {
kaldi::FbankOptions opts;
::kaldi::FbankOptions opts;
opts.frame_opts.samp_freq = samp_freq; // frame opts
opts.frame_opts.frame_shift_ms = frame_shift_ms;
opts.frame_opts.frame_length_ms = frame_length_ms;
@ -61,12 +72,12 @@ bool InitFbank(float samp_freq, // frame opts
opts.htk_compat = htk_compat;
opts.use_log_fbank = use_log_fbank;
opts.use_power = use_power;
paddleaudio::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
return true;
}
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav) {
return paddleaudio::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav);
return paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ComputeFbank(wav);
}
py::array_t<double> ComputeFbank(
@ -124,21 +135,14 @@ py::array_t<double> ComputeFbank(
use_log_fbank,
use_power);
py::array_t<double> result = ComputeFbankStreaming(wav);
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank();
paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
return result;
}
void ResetFbank() {
paddleaudio::KaldiFeatureWrapper::GetInstance()->ResetFbank();
paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
}
PYBIND11_MODULE(kaldi_featurepy, m) {
m.doc() = "kaldi_feature example";
m.def("InitFbank", &InitFbank, "init fbank");
m.def("ResetFbank", &ResetFbank, "reset fbank");
m.def("ComputeFbank", &ComputeFbank, "compute fbank");
m.def("ComputeFbankStreaming",
&ComputeFbankStreaming,
"compute fbank streaming");
}
} // kaldi
} // paddleaudio

@ -1,10 +1,29 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h"
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h"
namespace py = pybind11;
namespace paddleaudio {
namespace kaldi {
bool InitFbank(float samp_freq, // frame opts
float frame_shift_ms,
float frame_length_ms,
@ -41,7 +60,7 @@ py::array_t<double> ComputeFbank(
bool remove_dc_offset,
std::string window_type, // e.g. Hamming window
bool round_to_power_of_two,
kaldi::BaseFloat blackman_coeff,
::kaldi::BaseFloat blackman_coeff,
bool snip_edges,
bool allow_downsample,
bool allow_upsample,
@ -68,3 +87,6 @@ void ResetFbank();
py::array_t<double> ComputeFbankStreaming(const py::array_t<double>& wav);
py::array_t<double> TestFun(const py::array_t<double>& wav);
} // namespace kaldi
} // namespace paddleaudio

@ -1,13 +1,28 @@
#include "kaldi_feature_wrapper.h"
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h"
namespace paddleaudio {
namespace kaldi {
KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
static KaldiFeatureWrapper instance;
return &instance;
}
bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
bool KaldiFeatureWrapper::InitFbank(::kaldi::FbankOptions opts) {
fbank_.reset(new Fbank(opts));
return true;
}
@ -15,7 +30,7 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
const py::array_t<double> wav) {
py::buffer_info info = wav.request();
kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
::kaldi::Vector<::kaldi::BaseFloat> input_wav(info.size);
double* wav_ptr = (double*)info.ptr;
for (int idx = 0; idx < info.size; ++idx) {
input_wav(idx) = *wav_ptr;
@ -23,7 +38,7 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
}
kaldi::Vector<kaldi::BaseFloat> feats;
::kaldi::Vector<::kaldi::BaseFloat> feats;
bool flag = fbank_->ComputeFeature(input_wav, &feats);
if (flag == false || feats.Dim() == 0) return py::array_t<double>();
auto result = py::array_t<double>(feats.Dim());
@ -44,8 +59,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
py::buffer_info info = wav.request();
std::cout << info.size << std::endl;
auto result = py::array_t<double>(info.size);
//kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
kaldi::Vector<double> input_wav(info.size);
//::kaldi::Vector<::kaldi::BaseFloat> input_wav(info.size);
::kaldi::Vector<double> input_wav(info.size);
py::buffer_info info_re = result.request();
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
@ -55,5 +70,5 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
*/
}
} // namesapce kaldi
} // namespace paddleaudio

@ -0,0 +1,40 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/kaldi-common.h"
#include "feat/feature-fbank.h"
#include "paddlespeech/audio/src/pybind/kaldi/feature_common.h"
namespace paddleaudio {
namespace kaldi {
typedef StreamingFeatureTpl<::kaldi::FbankComputer> Fbank;
class KaldiFeatureWrapper {
public:
static KaldiFeatureWrapper* GetInstance();
bool InitFbank(::kaldi::FbankOptions opts);
py::array_t<double> ComputeFbank(const py::array_t<double> wav);
int Dim() { return fbank_->Dim(); }
void ResetFbank() { fbank_->Reset(); }
private:
std::unique_ptr<paddleaudio::kaldi::Fbank> fbank_;
};
} // namespace kaldi
} // namespace paddleaudio

@ -1,13 +0,0 @@
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
)
add_library(kaldi_feature
kaldi_feature.cc
kaldi_feature_wrapper.cc
)
target_link_libraries(kaldi_feature kaldi-fbank)
pybind11_add_module(kaldi_frontend kaldi_feature.cc kaldi_feature_wrapper.cc)
target_link_libraries(kaldi_frontend PRIVATE kaldi_feature)

@ -1,24 +0,0 @@
#include "base/kaldi-common.h"
#include "feat/feature-fbank.h"
#include "feature_common.h"
#pragma once
namespace paddleaudio {
typedef StreamingFeatureTpl<kaldi::FbankComputer> Fbank;
class KaldiFeatureWrapper {
public:
static KaldiFeatureWrapper* GetInstance();
bool InitFbank(kaldi::FbankOptions opts);
py::array_t<double> ComputeFbank(const py::array_t<double> wav);
int Dim() { return fbank_->Dim(); }
void ResetFbank() { fbank_->Reset(); }
private:
std::unique_ptr<paddleaudio::Fbank> fbank_;
};
} // namespace paddleaudio

@ -1,8 +1,10 @@
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
// All rights reserved.
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), All rights reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
// Sox
PYBIND11_MODULE(_paddleaudio, m) {
m.def("get_info_file",
&paddleaudio::sox_io::get_info_file,
@ -10,4 +12,11 @@ PYBIND11_MODULE(_paddleaudio, m) {
m.def("get_info_fileobj",
&paddleaudio::sox_io::get_info_fileobj,
"Get metadata of audio in file object.");
m.def("InitFbank", &paddleaudio::kaldi::InitFbank, "init fbank");
m.def("ResetFbank", &paddleaudio::kaldi::ResetFbank, "reset fbank");
m.def("ComputeFbank", &paddleaudio::kaldi::ComputeFbank, "compute fbank");
m.def("ComputeFbankStreaming",
&paddleaudio::kaldi::ComputeFbankStreaming,
"compute fbank streaming");
}

@ -1,8 +1,7 @@
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
// All rights reserved.
#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H
#define PADDLEAUDIO_PYBIND_SOX_UTILS_H
#pragma once
#include <pybind11/pybind11.h>
#include <sox.h>
@ -41,5 +40,3 @@ std::string get_encoding(sox_encoding_t encoding);
} // namespace paddleaudio
} // namespace sox_utils
#endif

@ -1,11 +1,15 @@
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
################################################################################
# sox
################################################################################
if (BUILD_SOX)
add_subdirectory(sox)
endif()
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/kaldi
)
add_subdirectory(kaldi)
################################################################################
# kaldi
################################################################################
if (BUILD_KALDI)
add_subdirectory(kaldi)
endif()

@ -1,14 +1,54 @@
project(kaldi)
# checkout the thirdparty/kaldi/base/kaldi-types.h
# compile kaldi without openfst
add_definitions("-DCOMPILE_WITHOUT_OPENFST")
add_library(kaldi-base
# function (define_library name source include_dirs link_libraries compile_defs)
# add_library(${name} INTERFACE ${source})
# target_include_directories(${name} INTERFACE ${include_dirs})
# target_link_libraries(${name} INTERFACE ${link_libraries})
# target_compile_definitions(${name} INTERFACE ${compile_defs})
# set_target_properties(${name} PROPERTIES PREFIX "")
# if (MSVC)
# set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
# endif(MSVC)
# install(
# TARGETS ${name}
# LIBRARY DESTINATION lib
# RUNTIME DESTINATION lib # For Windows
# )
# endfunction()
# kaldi-base
add_library(kaldi-base STATIC
base/io-funcs.cc
base/kaldi-error.cc
base/kaldi-math.cc
base/kaldi-utils.cc
base/timer.cc
)
target_include_directories(kaldi-base PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
add_library(kaldi-util
# kaldi-matrix
add_library(kaldi-matrix STATIC
matrix/compressed-matrix.cc
matrix/kaldi-matrix.cc
matrix/kaldi-vector.cc
matrix/matrix-functions.cc
matrix/optimization.cc
matrix/packed-matrix.cc
matrix/qr.cc
matrix/sparse-matrix.cc
matrix/sp-matrix.cc
matrix/srfft.cc
matrix/tp-matrix.cc
)
target_include_directories(kaldi-matrix PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-matrix PUBLIC gfortran kaldi-base libopenblas)
# kaldi-util
add_library(kaldi-util STATIC
util/kaldi-holder.cc
util/kaldi-io.cc
util/kaldi-semaphore.cc
@ -19,19 +59,12 @@ add_library(kaldi-util
util/simple-options.cc
util/text-utils.cc
)
target_include_directories(kaldi-util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)
add_library(kaldi-mfcc
feat/feature-mfcc.cc
)
target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
add_library(kaldi-fbank
feat/feature-fbank.cc
)
target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
add_library(kaldi-feat-common
# kaldi-feat-common
add_library(kaldi-feat-common STATIC
feat/wave-reader.cc
feat/signal.cc
feat/feature-functions.cc
@ -40,20 +73,44 @@ add_library(kaldi-feat-common
feat/mel-computations.cc
feat/cmvn.cc
)
target_include_directories(kaldi-feat-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util)
add_library(kaldi-matrix
matrix/compressed-matrix.cc
matrix/kaldi-matrix.cc
matrix/kaldi-vector.cc
matrix/matrix-functions.cc
matrix/optimization.cc
matrix/packed-matrix.cc
matrix/qr.cc
matrix/sparse-matrix.cc
matrix/sp-matrix.cc
matrix/srfft.cc
matrix/tp-matrix.cc
# kaldi-mfcc
add_library(kaldi-mfcc STATIC
feat/feature-mfcc.cc
)
target_link_libraries(kaldi-matrix gfortran kaldi-base libopenblas.a)
target_include_directories(kaldi-mfcc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common)
# kaldi-fbank
add_library(kaldi-fbank STATIC
feat/feature-fbank.cc
)
target_include_directories(kaldi-fbank PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common)
set(KALDI_LIBRARIES
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-base.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-matrix.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-util.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-feat-common.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-mfcc.a
${CMAKE_CURRENT_BINARY_DIR}/libkaldi-fbank.a
)
add_library(libkaldi INTERFACE)
add_dependencies(libkaldi kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank)
target_include_directories(libkaldi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(libkaldi INTERFACE
# --whole-archive for undefined symbol when link static lib into shared lib
-Wl,--start-group -Wl,--whole-archive
${KALDI_LIBRARIES}
libopenblas
gfortran
-Wl,--no-whole-archive -Wl,--end-group
)
target_compile_definitions(libkaldi INTERFACE "-DCOMPILE_WITHOUT_OPENFST")

@ -42,7 +42,7 @@ typedef float BaseFloat;
// for discussion on what to do if you need compile kaldi
// without OpenFST, see the bottom of this this file
#if (COMPILE_WITHOUT_OPENFST != 0)
#ifndef COMPILE_WITHOUT_OPENFST
#include <fst/types.h>

@ -35,7 +35,7 @@ def _get_build(var, default=False):
_BUILD_SOX = False if platform.system() == "Windows" else _get_build(
"BUILD_SOX", True)
_BUILD_MAD = _get_build("BUILD_MAD", False)
# _BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
# _BUILD_RNNT = _get_build("BUILD_RNNT", True)
# _BUILD_CTC_DECODER = False if platform.system() == "Windows" else _get_build("BUILD_CTC_DECODER", True)
# _USE_FFMPEG = _get_build("USE_FFMPEG", False)
@ -89,6 +89,7 @@ class CMakeBuild(build_ext):
f"-DCMAKE_INSTALL_PREFIX={extdir}",
"-DCMAKE_VERBOSE_MAKEFILE=ON",
f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
f"-DPYTHON_LIBRARY={distutils.sysconfig.get_config_var('LIBDIR')}",
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}",
# f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",

Loading…
Cancel
Save