parent
9762b4a361
commit
59f67db19e
@ -1,17 +1,46 @@
|
|||||||
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||||
|
|
||||||
|
# Use compiler ID "AppleClang" instead of "Clang" for XCode.
|
||||||
|
# Not setting this sometimes makes XCode C compiler gets detected as "Clang",
|
||||||
|
# even when the C++ one is detected as "AppleClang".
|
||||||
|
cmake_policy(SET CMP0010 NEW)
|
||||||
|
cmake_policy(SET CMP0025 NEW)
|
||||||
|
|
||||||
|
# Suppress warning flags in default MSVC configuration. It's not
|
||||||
|
# mandatory that we do this (and we don't if cmake is old), but it's
|
||||||
|
# nice when it's possible, and it's possible on our Windows configs.
|
||||||
|
if(NOT CMAKE_VERSION VERSION_LESS 3.15.0)
|
||||||
|
cmake_policy(SET CMP0092 NEW)
|
||||||
|
endif()
|
||||||
|
|
||||||
project(paddlespeech)
|
project(paddlespeech)
|
||||||
|
|
||||||
# check and set CMAKE_CXX_STANDARD
|
# check and set CMAKE_CXX_STANDARD
|
||||||
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
|
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
|
||||||
|
if(env_cxx_standard GREATER -1)
|
||||||
|
message(
|
||||||
|
WARNING "C++ standard version definition detected in environment variable."
|
||||||
|
"paddlespeech requires -std=c++14. Please remove -std=c++ settings in your environment.")
|
||||||
|
endif()
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
set(CMAKE_C_STANDARD 11)
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||||
|
|
||||||
|
|
||||||
# Options
|
# Options
|
||||||
option(BUILD_SOX "Build libsox statically" ON)
|
option(BUILD_SOX "Build libsox statically" ON)
|
||||||
|
option(BUILD_MAD "Enable libmad" ON)
|
||||||
|
option(BUILD_PADDLEAUDIO_PYTHON_EXTENSION "Build Python extension" ON)
|
||||||
|
|
||||||
|
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||||
|
|
||||||
|
find_package(Python3 COMPONENTS Interpreter Development)
|
||||||
|
find_package(pybind11 CONFIG)
|
||||||
|
message(STATUS "Python_INCLUDE_DIR=" ${Python_INCLUDE_DIR})
|
||||||
|
|
||||||
add_subdirectory(paddlespeech/audio/third_party)
|
add_subdirectory(paddlespeech/audio/third_party)
|
||||||
add_subdirectory(paddlespeech/audio/src)
|
add_subdirectory(paddlespeech/audio/src)
|
||||||
|
@ -0,0 +1,47 @@
|
|||||||
|
import types
|
||||||
|
|
||||||
|
class _ClassNamespace(types.ModuleType):
|
||||||
|
def __init__(self, name):
|
||||||
|
super(_ClassNamespace, self).__init__('paddlespeech.classes' + name)
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
proxy = None
|
||||||
|
if proxy is None:
|
||||||
|
raise RuntimeError(f'Class {self.name}.{attr} not registered!')
|
||||||
|
return proxy
|
||||||
|
|
||||||
|
class _Classes(types.ModuleType):
|
||||||
|
__file__ = '_classes.py'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(_Classes, self).__init__('paddlespeech.classes')
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
namespace = _ClassNamespace(name)
|
||||||
|
setattr(self, name, namespace)
|
||||||
|
return namespace
|
||||||
|
|
||||||
|
@property
|
||||||
|
def loaded_libraries(self):
|
||||||
|
return paddlespeech.ops.loaded_libraries
|
||||||
|
|
||||||
|
def load_library(self, path):
|
||||||
|
"""
|
||||||
|
Loads a shared library from the given path into the current process.
|
||||||
|
The library being loaded may run global initialization code to register
|
||||||
|
custom classes with the PyTorch JIT runtime. This allows dynamically
|
||||||
|
loading custom classes. For this, you should compile your class
|
||||||
|
and the static registration code into a shared library object, and then
|
||||||
|
call ``torch.classes.load_library('path/to/libcustom.so')`` to load the
|
||||||
|
shared object.
|
||||||
|
After the library is loaded, it is added to the
|
||||||
|
``torch.classes.loaded_libraries`` attribute, a set that may be inspected
|
||||||
|
for the paths of all libraries loaded using this function.
|
||||||
|
Args:
|
||||||
|
path (str): A path to a shared library to load.
|
||||||
|
"""
|
||||||
|
paddlespeech.ops.load_library(path)
|
||||||
|
|
||||||
|
# The classes "namespace"
|
||||||
|
classes = _Classes()
|
@ -0,0 +1,98 @@
|
|||||||
|
import os
|
||||||
|
import warnings
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ._internal import module_utils as _mod_utils # noqa: F401
|
||||||
|
|
||||||
|
_LIB_DIR = Path(__file__) / "lib"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_lib_path(lib: str):
|
||||||
|
suffix = "pyd" if os.name == "nt" else "so"
|
||||||
|
path = _LIB_DIR / f"{lib}.{suffix}"
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _load_lib(lib: str) -> bool:
|
||||||
|
"""Load extension module
|
||||||
|
Note:
|
||||||
|
In case `paddleaudio` is deployed with `pex` format, the library file
|
||||||
|
is not in a standard location.
|
||||||
|
In this case, we expect that `libpaddlleaudio` is available somewhere
|
||||||
|
in the search path of dynamic loading mechanism, so that importing
|
||||||
|
`_paddlleaudio` will have library loader find and load `libpaddlleaudio`.
|
||||||
|
This is the reason why the function should not raising an error when the library
|
||||||
|
file is not found.
|
||||||
|
Returns:
|
||||||
|
bool:
|
||||||
|
True if the library file is found AND the library loaded without failure.
|
||||||
|
False if the library file is not found (like in the case where paddlleaudio
|
||||||
|
is deployed with pex format, thus the shared library file is
|
||||||
|
in a non-standard location.).
|
||||||
|
If the library file is found but there is an issue loading the library,
|
||||||
|
(such as missing dependency) then this function raises the exception as-is.
|
||||||
|
Raises:
|
||||||
|
Exception:
|
||||||
|
If the library file is found, but there is an issue loading the library file,
|
||||||
|
(when underlying `ctype.DLL` throws an exception), this function will pass
|
||||||
|
the exception as-is, instead of catching it and returning bool.
|
||||||
|
The expected case is `OSError` thrown by `ctype.DLL` when a dynamic dependency
|
||||||
|
is not found.
|
||||||
|
This behavior was chosen because the expected failure case is not recoverable.
|
||||||
|
If a dependency is missing, then users have to install it.
|
||||||
|
"""
|
||||||
|
path = _get_lib_path(lib)
|
||||||
|
if not path.exists():
|
||||||
|
return False
|
||||||
|
paddlespeech.ops.load_library(path)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
_FFMPEG_INITIALIZED = False
|
||||||
|
|
||||||
|
|
||||||
|
def _init_ffmpeg():
|
||||||
|
global _FFMPEG_INITIALIZED
|
||||||
|
if _FFMPEG_INITIALIZED:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not paddlespeech.ops.paddlleaudio.is_ffmpeg_available():
|
||||||
|
raise RuntimeError(
|
||||||
|
"paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
_load_lib("libpaddlleaudio_ffmpeg")
|
||||||
|
except OSError as err:
|
||||||
|
raise ImportError("FFmpeg libraries are not found. Please install FFmpeg.") from err
|
||||||
|
|
||||||
|
import paddllespeech._paddlleaudio_ffmpeg # noqa
|
||||||
|
|
||||||
|
paddlespeech.ops.paddlleaudio.ffmpeg_init()
|
||||||
|
if paddlespeech.ops.paddlleaudio.ffmpeg_get_log_level() > 8:
|
||||||
|
paddlespeech.ops.paddlleaudio.ffmpeg_set_log_level(8)
|
||||||
|
|
||||||
|
_FFMPEG_INITIALIZED = True
|
||||||
|
|
||||||
|
|
||||||
|
def _init_extension():
|
||||||
|
if not _mod_utils.is_module_available("paddlespeech._paddleaudio"):
|
||||||
|
warnings.warn("paddlespeech C++ extension is not available.")
|
||||||
|
return
|
||||||
|
|
||||||
|
_load_lib("libpaddleaudio")
|
||||||
|
# This import is for initializing the methods registered via PyBind11
|
||||||
|
# This has to happen after the base library is loaded
|
||||||
|
from paddlespeech import _paddleaudio # noqa
|
||||||
|
|
||||||
|
# Because this part is executed as part of `import torchaudio`, we ignore the
|
||||||
|
# initialization failure.
|
||||||
|
# If the FFmpeg integration is not properly initialized, then detailed error
|
||||||
|
# will be raised when client code attempts to import the dedicated feature.
|
||||||
|
try:
|
||||||
|
_init_ffmpeg()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
_init_extension()
|
@ -0,0 +1,142 @@
|
|||||||
|
import importlib.util
|
||||||
|
import warnings
|
||||||
|
from functools import wraps
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
def is_module_available(*modules: str) -> bool:
|
||||||
|
r"""Returns if a top-level module with :attr:`name` exists *without**
|
||||||
|
importing it. This is generally safer than try-catch block around a
|
||||||
|
`import X`. It avoids third party libraries breaking assumptions of some of
|
||||||
|
our tests, e.g., setting multiprocessing start method when imported
|
||||||
|
(see librosa/#747, torchvision/#544).
|
||||||
|
"""
|
||||||
|
return all(importlib.util.find_spec(m) is not None for m in modules)
|
||||||
|
|
||||||
|
|
||||||
|
def requires_module(*modules: str):
|
||||||
|
"""Decorate function to give error message if invoked without required optional modules.
|
||||||
|
This decorator is to give better error message to users rather
|
||||||
|
than raising ``NameError: name 'module' is not defined`` at random places.
|
||||||
|
"""
|
||||||
|
missing = [m for m in modules if not is_module_available(m)]
|
||||||
|
|
||||||
|
if not missing:
|
||||||
|
# fall through. If all the modules are available, no need to decorate
|
||||||
|
def decorator(func):
|
||||||
|
return func
|
||||||
|
|
||||||
|
else:
|
||||||
|
req = f"module: {missing[0]}" if len(missing) == 1 else f"modules: {missing}"
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
raise RuntimeError(f"{func.__module__}.{func.__name__} requires {req}")
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def deprecated(direction: str, version: Optional[str] = None):
|
||||||
|
"""Decorator to add deprecation message
|
||||||
|
Args:
|
||||||
|
direction (str): Migration steps to be given to users.
|
||||||
|
version (str or int): The version when the object will be removed
|
||||||
|
"""
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
message = (
|
||||||
|
f"{func.__module__}.{func.__name__} has been deprecated "
|
||||||
|
f'and will be removed from {"future" if version is None else version} release. '
|
||||||
|
f"{direction}"
|
||||||
|
)
|
||||||
|
warnings.warn(message, stacklevel=2)
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def is_kaldi_available():
|
||||||
|
return is_module_available("paddlespeech"._paddleaudio") and paddlespeech.ops.paddleaudio.is_kaldi_available()
|
||||||
|
|
||||||
|
|
||||||
|
def requires_kaldi():
|
||||||
|
if is_kaldi_available():
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
return func
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
raise RuntimeError(f"{func.__module__}.{func.__name__} requires kaldi")
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def _check_soundfile_importable():
|
||||||
|
if not is_module_available("soundfile"):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
import soundfile # noqa: F401
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
warnings.warn("Failed to import soundfile. 'soundfile' backend is not available.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
_is_soundfile_importable = _check_soundfile_importable()
|
||||||
|
|
||||||
|
|
||||||
|
def is_soundfile_available():
|
||||||
|
return _is_soundfile_importable
|
||||||
|
|
||||||
|
|
||||||
|
def requires_soundfile():
|
||||||
|
if is_soundfile_available():
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
return func
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
raise RuntimeError(f"{func.__module__}.{func.__name__} requires soundfile")
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def is_sox_available():
|
||||||
|
return is_module_available("paddlespeech._paddleaudio") and paddlespeech.ops.paddleaudio.is_sox_available()
|
||||||
|
|
||||||
|
|
||||||
|
def requires_sox():
|
||||||
|
if is_sox_available():
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
return func
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
raise RuntimeError(f"{func.__module__}.{func.__name__} requires sox")
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
return
|
@ -0,0 +1,62 @@
|
|||||||
|
import contextlib
|
||||||
|
import ctypes
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import types
|
||||||
|
|
||||||
|
# Query `hasattr` only once.
|
||||||
|
_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys, 'setdlopenflags')
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def dl_open_guard():
|
||||||
|
"""
|
||||||
|
# https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
|
||||||
|
Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
|
||||||
|
shared library to load custom operators.
|
||||||
|
"""
|
||||||
|
if _SET_GLOBAL_FLAGS:
|
||||||
|
old_flags = sys.getdlopenflags()
|
||||||
|
sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
|
||||||
|
yield
|
||||||
|
if _SET_GLOBAL_FLAGS:
|
||||||
|
sys.setdlopenflags(old_flags)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_library_path(path: str) -> str:
|
||||||
|
return os.path.realpath(path)
|
||||||
|
|
||||||
|
|
||||||
|
class _Ops(types.ModuleType):
|
||||||
|
__file__ = '_ops.py'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(_Ops, self).__init__('paddlespeech.ops')
|
||||||
|
self.loaded_libraries = set()
|
||||||
|
|
||||||
|
def load_library(self, path):
|
||||||
|
"""
|
||||||
|
Loads a shared library from the given path into the current process.
|
||||||
|
This allows dynamically loading custom operators. For this,
|
||||||
|
you should compile your operator and
|
||||||
|
the static registration code into a shared library object, and then
|
||||||
|
call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the
|
||||||
|
shared object.
|
||||||
|
After the library is loaded, it is added to the
|
||||||
|
``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected
|
||||||
|
for the paths of all libraries loaded using this function.
|
||||||
|
Args:
|
||||||
|
path (str): A path to a shared library to load.
|
||||||
|
"""
|
||||||
|
path = resolve_library_path(path)
|
||||||
|
with dl_open_guard():
|
||||||
|
# https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
|
||||||
|
# Import the shared library into the process, thus running its
|
||||||
|
# static (global) initialization code in order to register custom
|
||||||
|
# operators with the JIT.
|
||||||
|
ctypes.CDLL(path)
|
||||||
|
self.loaded_libraries.add(path)
|
||||||
|
|
||||||
|
|
||||||
|
# The ops "namespace"
|
||||||
|
ops = _Ops()
|
@ -1,36 +1,177 @@
|
|||||||
find_package(Python3 COMPONENTS Interpreter Development)
|
if (MSVC)
|
||||||
find_package(pybind11 CONFIG)
|
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# libpaddleaudio
|
||||||
|
################################################################################
|
||||||
|
set(
|
||||||
|
LIBPADDLEAUDIO_SOURCES
|
||||||
|
utils.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
LIBPADDLEAUDIO_INCLUDE_DIRS
|
||||||
|
${PROJECT_SOURCE_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
LIBPADDLEAUDIO_LINK_LIBRARIES
|
||||||
|
)
|
||||||
|
|
||||||
|
set(
|
||||||
|
LIBPADDLEAUDIO_COMPILE_DEFINITIONS)
|
||||||
|
|
||||||
function(define_extension name sources libraries)
|
#------------------------------------------------------------------------------#
|
||||||
|
# START OF CUSTOMIZATION LOGICS
|
||||||
|
#------------------------------------------------------------------------------#
|
||||||
|
|
||||||
|
if(BUILD_SOX)
|
||||||
|
list(
|
||||||
|
APPEND
|
||||||
|
LIBPADDLEAUDIO_LINK_LIBRARIES
|
||||||
|
libsox
|
||||||
|
)
|
||||||
|
list(
|
||||||
|
APPEND
|
||||||
|
LIBPADDLEAUDIO_SOURCES
|
||||||
|
# sox/io.cpp
|
||||||
|
# sox/utils.cpp
|
||||||
|
# sox/effects.cpp
|
||||||
|
# sox/effects_chain.cpp
|
||||||
|
# sox/types.cpp
|
||||||
|
)
|
||||||
|
list(
|
||||||
|
APPEND
|
||||||
|
LIBPADDLEAUDIO_COMPILE_DEFINITIONS
|
||||||
|
INCLUDE_SOX
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
#------------------------------------------------------------------------------#
|
||||||
|
# END OF CUSTOMIZATION LOGICS
|
||||||
|
#------------------------------------------------------------------------------#
|
||||||
|
|
||||||
|
function (define_library name source include_dirs link_libraries compile_defs)
|
||||||
|
add_library(${name} SHARED ${source})
|
||||||
|
target_include_directories(${name} PRIVATE ${include_dirs})
|
||||||
|
target_link_libraries(${name} ${link_libraries})
|
||||||
|
target_compile_definitions(${name} PRIVATE ${compile_defs})
|
||||||
|
set_target_properties(${name} PROPERTIES PREFIX "")
|
||||||
|
if (MSVC)
|
||||||
|
set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
|
||||||
|
endif(MSVC)
|
||||||
|
install(
|
||||||
|
TARGETS ${name}
|
||||||
|
LIBRARY DESTINATION lib
|
||||||
|
RUNTIME DESTINATION lib # For Windows
|
||||||
|
)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
|
||||||
|
define_library(
|
||||||
|
libpaddleaudio
|
||||||
|
"${LIBPADDLEAUDIO_SOURCES}"
|
||||||
|
"${LIBPADDLEAUDIO_INCLUDE_DIRS}"
|
||||||
|
"${LIBPADDLEAUDIO_LINK_LIBRARIES}"
|
||||||
|
"${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (APPLE)
|
||||||
|
set(TORCHAUDIO_LIBRARY libtorchaudio CACHE INTERNAL "")
|
||||||
|
else()
|
||||||
|
set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libtorchaudio -Wl,--as-needed CACHE INTERNAL "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# _paddleaudio.so
|
||||||
|
################################################################################
|
||||||
|
if (BUILD_PADDLEAUDIO_PYTHON_EXTENSION)
|
||||||
|
if (WIN32)
|
||||||
|
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
|
||||||
|
set(ADDITIONAL_ITEMS Python3::Python)
|
||||||
|
endif()
|
||||||
|
function(define_extension name sources include_dirs libraries definitions)
|
||||||
add_library(${name} SHARED ${sources})
|
add_library(${name} SHARED ${sources})
|
||||||
|
target_compile_definitions(${name} PRIVATE "${definitions}")
|
||||||
target_include_directories(
|
target_include_directories(
|
||||||
${name} PRIVATE ${PROJECT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${Python3_INCLUDE_DIRS} ${pybind11_INCLUDE_DIR})
|
${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${include_dirs})
|
||||||
target_link_libraries(${name} ${libraries})
|
target_link_libraries(
|
||||||
|
${name}
|
||||||
|
${libraries}
|
||||||
|
${TORCH_PYTHON_LIBRARY}
|
||||||
|
${ADDITIONAL_ITEMS}
|
||||||
|
)
|
||||||
set_target_properties(${name} PROPERTIES PREFIX "")
|
set_target_properties(${name} PROPERTIES PREFIX "")
|
||||||
|
if (MSVC)
|
||||||
|
set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
|
||||||
|
endif(MSVC)
|
||||||
|
if (APPLE)
|
||||||
|
# https://github.com/facebookarchive/caffe2/issues/854#issuecomment-364538485
|
||||||
|
# https://github.com/pytorch/pytorch/commit/73f6715f4725a0723d8171d3131e09ac7abf0666
|
||||||
|
set_target_properties(${name} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
|
||||||
|
endif()
|
||||||
install(
|
install(
|
||||||
TARGETS ${name}
|
TARGETS ${name}
|
||||||
LIBRARY DESTINATION .
|
LIBRARY DESTINATION .
|
||||||
|
RUNTIME DESTINATION . # For Windows
|
||||||
)
|
)
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
set(
|
||||||
|
EXTENSION_SOURCES
|
||||||
|
pybind/pybind.cpp
|
||||||
|
)
|
||||||
|
#----------------------------------------------------------------------------#
|
||||||
|
# START OF CUSTOMIZATION LOGICS
|
||||||
|
#----------------------------------------------------------------------------#
|
||||||
if(BUILD_SOX)
|
if(BUILD_SOX)
|
||||||
set(
|
list(
|
||||||
|
APPEND
|
||||||
EXTENSION_SOURCES
|
EXTENSION_SOURCES
|
||||||
pybind/pybind.cpp
|
# pybind/sox/effects.cpp
|
||||||
|
# pybind/sox/effects_chain.cpp
|
||||||
pybind/sox/io.cpp
|
pybind/sox/io.cpp
|
||||||
pybind/sox/utils.cpp
|
pybind/sox/utils.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set(
|
|
||||||
LINK_LIBRARIES
|
|
||||||
libsox
|
|
||||||
)
|
|
||||||
|
|
||||||
define_extension(
|
|
||||||
_paddleaudio
|
|
||||||
"${EXTENSION_SOURCES}"
|
|
||||||
"${LINK_LIBRARIES}"
|
|
||||||
)
|
|
||||||
endif()
|
endif()
|
||||||
|
#----------------------------------------------------------------------------#
|
||||||
add_subdirectory(pybind/kaldi_frontend)
|
# END OF CUSTOMIZATION LOGICS
|
||||||
|
#----------------------------------------------------------------------------#
|
||||||
|
define_extension(
|
||||||
|
_paddleaudio
|
||||||
|
"${EXTENSION_SOURCES}"
|
||||||
|
""
|
||||||
|
libpaddleaudio
|
||||||
|
"${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
|
||||||
|
)
|
||||||
|
# if(BUILD_CTC_DECODER)
|
||||||
|
# set(
|
||||||
|
# DECODER_EXTENSION_SOURCES
|
||||||
|
# decoder/bindings/pybind.cpp
|
||||||
|
# )
|
||||||
|
# define_extension(
|
||||||
|
# _paddleaudio_decoder
|
||||||
|
# "${DECODER_EXTENSION_SOURCES}"
|
||||||
|
# ""
|
||||||
|
# "libpaddleaudio_decoder"
|
||||||
|
# "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
|
||||||
|
# )
|
||||||
|
# endif()
|
||||||
|
# if(USE_FFMPEG)
|
||||||
|
# set(
|
||||||
|
# FFMPEG_EXTENSION_SOURCES
|
||||||
|
# ffmpeg/pybind/typedefs.cpp
|
||||||
|
# ffmpeg/pybind/pybind.cpp
|
||||||
|
# ffmpeg/pybind/stream_reader.cpp
|
||||||
|
# )
|
||||||
|
# define_extension(
|
||||||
|
# _paddleaudio_ffmpeg
|
||||||
|
# "${FFMPEG_EXTENSION_SOURCES}"
|
||||||
|
# "${FFMPEG_INCLUDE_DIRS}"
|
||||||
|
# "libpaddleaudio_ffmpeg"
|
||||||
|
# "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
|
||||||
|
# )
|
||||||
|
# endif()
|
||||||
|
endif()
|
@ -0,0 +1,121 @@
|
|||||||
|
Creative Commons Legal Code
|
||||||
|
|
||||||
|
CC0 1.0 Universal
|
||||||
|
|
||||||
|
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
|
||||||
|
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
|
||||||
|
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
|
||||||
|
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
|
||||||
|
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
|
||||||
|
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
|
||||||
|
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
|
||||||
|
HEREUNDER.
|
||||||
|
|
||||||
|
Statement of Purpose
|
||||||
|
|
||||||
|
The laws of most jurisdictions throughout the world automatically confer
|
||||||
|
exclusive Copyright and Related Rights (defined below) upon the creator
|
||||||
|
and subsequent owner(s) (each and all, an "owner") of an original work of
|
||||||
|
authorship and/or a database (each, a "Work").
|
||||||
|
|
||||||
|
Certain owners wish to permanently relinquish those rights to a Work for
|
||||||
|
the purpose of contributing to a commons of creative, cultural and
|
||||||
|
scientific works ("Commons") that the public can reliably and without fear
|
||||||
|
of later claims of infringement build upon, modify, incorporate in other
|
||||||
|
works, reuse and redistribute as freely as possible in any form whatsoever
|
||||||
|
and for any purposes, including without limitation commercial purposes.
|
||||||
|
These owners may contribute to the Commons to promote the ideal of a free
|
||||||
|
culture and the further production of creative, cultural and scientific
|
||||||
|
works, or to gain reputation or greater distribution for their Work in
|
||||||
|
part through the use and efforts of others.
|
||||||
|
|
||||||
|
For these and/or other purposes and motivations, and without any
|
||||||
|
expectation of additional consideration or compensation, the person
|
||||||
|
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
|
||||||
|
is an owner of Copyright and Related Rights in the Work, voluntarily
|
||||||
|
elects to apply CC0 to the Work and publicly distribute the Work under its
|
||||||
|
terms, with knowledge of his or her Copyright and Related Rights in the
|
||||||
|
Work and the meaning and intended legal effect of CC0 on those rights.
|
||||||
|
|
||||||
|
1. Copyright and Related Rights. A Work made available under CC0 may be
|
||||||
|
protected by copyright and related or neighboring rights ("Copyright and
|
||||||
|
Related Rights"). Copyright and Related Rights include, but are not
|
||||||
|
limited to, the following:
|
||||||
|
|
||||||
|
i. the right to reproduce, adapt, distribute, perform, display,
|
||||||
|
communicate, and translate a Work;
|
||||||
|
ii. moral rights retained by the original author(s) and/or performer(s);
|
||||||
|
iii. publicity and privacy rights pertaining to a person's image or
|
||||||
|
likeness depicted in a Work;
|
||||||
|
iv. rights protecting against unfair competition in regards to a Work,
|
||||||
|
subject to the limitations in paragraph 4(a), below;
|
||||||
|
v. rights protecting the extraction, dissemination, use and reuse of data
|
||||||
|
in a Work;
|
||||||
|
vi. database rights (such as those arising under Directive 96/9/EC of the
|
||||||
|
European Parliament and of the Council of 11 March 1996 on the legal
|
||||||
|
protection of databases, and under any national implementation
|
||||||
|
thereof, including any amended or successor version of such
|
||||||
|
directive); and
|
||||||
|
vii. other similar, equivalent or corresponding rights throughout the
|
||||||
|
world based on applicable law or treaty, and any national
|
||||||
|
implementations thereof.
|
||||||
|
|
||||||
|
2. Waiver. To the greatest extent permitted by, but not in contravention
|
||||||
|
of, applicable law, Affirmer hereby overtly, fully, permanently,
|
||||||
|
irrevocably and unconditionally waives, abandons, and surrenders all of
|
||||||
|
Affirmer's Copyright and Related Rights and associated claims and causes
|
||||||
|
of action, whether now known or unknown (including existing as well as
|
||||||
|
future claims and causes of action), in the Work (i) in all territories
|
||||||
|
worldwide, (ii) for the maximum duration provided by applicable law or
|
||||||
|
treaty (including future time extensions), (iii) in any current or future
|
||||||
|
medium and for any number of copies, and (iv) for any purpose whatsoever,
|
||||||
|
including without limitation commercial, advertising or promotional
|
||||||
|
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
|
||||||
|
member of the public at large and to the detriment of Affirmer's heirs and
|
||||||
|
successors, fully intending that such Waiver shall not be subject to
|
||||||
|
revocation, rescission, cancellation, termination, or any other legal or
|
||||||
|
equitable action to disrupt the quiet enjoyment of the Work by the public
|
||||||
|
as contemplated by Affirmer's express Statement of Purpose.
|
||||||
|
|
||||||
|
3. Public License Fallback. Should any part of the Waiver for any reason
|
||||||
|
be judged legally invalid or ineffective under applicable law, then the
|
||||||
|
Waiver shall be preserved to the maximum extent permitted taking into
|
||||||
|
account Affirmer's express Statement of Purpose. In addition, to the
|
||||||
|
extent the Waiver is so judged Affirmer hereby grants to each affected
|
||||||
|
person a royalty-free, non transferable, non sublicensable, non exclusive,
|
||||||
|
irrevocable and unconditional license to exercise Affirmer's Copyright and
|
||||||
|
Related Rights in the Work (i) in all territories worldwide, (ii) for the
|
||||||
|
maximum duration provided by applicable law or treaty (including future
|
||||||
|
time extensions), (iii) in any current or future medium and for any number
|
||||||
|
of copies, and (iv) for any purpose whatsoever, including without
|
||||||
|
limitation commercial, advertising or promotional purposes (the
|
||||||
|
"License"). The License shall be deemed effective as of the date CC0 was
|
||||||
|
applied by Affirmer to the Work. Should any part of the License for any
|
||||||
|
reason be judged legally invalid or ineffective under applicable law, such
|
||||||
|
partial invalidity or ineffectiveness shall not invalidate the remainder
|
||||||
|
of the License, and in such case Affirmer hereby affirms that he or she
|
||||||
|
will not (i) exercise any of his or her remaining Copyright and Related
|
||||||
|
Rights in the Work or (ii) assert any associated claims and causes of
|
||||||
|
action with respect to the Work, in either case contrary to Affirmer's
|
||||||
|
express Statement of Purpose.
|
||||||
|
|
||||||
|
4. Limitations and Disclaimers.
|
||||||
|
|
||||||
|
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
||||||
|
surrendered, licensed or otherwise affected by this document.
|
||||||
|
b. Affirmer offers the Work as-is and makes no representations or
|
||||||
|
warranties of any kind concerning the Work, express, implied,
|
||||||
|
statutory or otherwise, including without limitation warranties of
|
||||||
|
title, merchantability, fitness for a particular purpose, non
|
||||||
|
infringement, or the absence of latent or other defects, accuracy, or
|
||||||
|
the present or absence of errors, whether or not discoverable, all to
|
||||||
|
the greatest extent permissible under applicable law.
|
||||||
|
c. Affirmer disclaims responsibility for clearing rights of other persons
|
||||||
|
that may apply to the Work or any use thereof, including without
|
||||||
|
limitation any person's Copyright and Related Rights in the Work.
|
||||||
|
Further, Affirmer disclaims responsibility for obtaining any necessary
|
||||||
|
consents, permissions or other rights required for any use of the
|
||||||
|
Work.
|
||||||
|
d. Affirmer understands and acknowledges that Creative Commons is not a
|
||||||
|
party to this document and has no duty or obligation with respect to
|
||||||
|
this CC0 or use of the Work.
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,146 @@
|
|||||||
|
// #include "sox/effects.h"
|
||||||
|
// #include "sox/effects_chain.h"
|
||||||
|
#include "sox/io.h"
|
||||||
|
#include "sox/types.h"
|
||||||
|
#include "sox/utils.h"
|
||||||
|
|
||||||
|
using namespace torch::indexing;
|
||||||
|
using namespace paddleaudio::sox_utils;
|
||||||
|
|
||||||
|
namespace paddleaudio {
|
||||||
|
namespace sox_io {
|
||||||
|
|
||||||
|
tl::optional<MetaDataTuple> get_info_file(
|
||||||
|
const std::string& path,
|
||||||
|
const tl::optional<std::string>& format) {
|
||||||
|
SoxFormat sf(sox_open_read(
|
||||||
|
path.c_str(),
|
||||||
|
/*signal=*/nullptr,
|
||||||
|
/*encoding=*/nullptr,
|
||||||
|
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
|
||||||
|
|
||||||
|
if (static_cast<sox_format_t*>(sf) == nullptr ||
|
||||||
|
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::forward_as_tuple(
|
||||||
|
static_cast<int64_t>(sf->signal.rate),
|
||||||
|
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
|
||||||
|
static_cast<int64_t>(sf->signal.channels),
|
||||||
|
static_cast<int64_t>(sf->encoding.bits_per_sample),
|
||||||
|
get_encoding(sf->encoding.encoding));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::string>> get_effects(
|
||||||
|
const tl::optional<int64_t>& frame_offset,
|
||||||
|
const tl::optional<int64_t>& num_frames) {
|
||||||
|
const auto offset = frame_offset.value_or(0);
|
||||||
|
if (offset < 0) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Invalid argument: frame_offset must be non-negative.");
|
||||||
|
}
|
||||||
|
const auto frames = num_frames.value_or(-1);
|
||||||
|
if (frames == 0 || frames < -1) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Invalid argument: num_frames must be -1 or greater than 0.");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::string>> effects;
|
||||||
|
if (frames != -1) {
|
||||||
|
std::ostringstream os_offset, os_frames;
|
||||||
|
os_offset << offset << "s";
|
||||||
|
os_frames << "+" << frames << "s";
|
||||||
|
effects.emplace_back(
|
||||||
|
std::vector<std::string>{"trim", os_offset.str(), os_frames.str()});
|
||||||
|
} else if (offset != 0) {
|
||||||
|
std::ostringstream os_offset;
|
||||||
|
os_offset << offset << "s";
|
||||||
|
effects.emplace_back(std::vector<std::string>{"trim", os_offset.str()});
|
||||||
|
}
|
||||||
|
return effects;
|
||||||
|
}
|
||||||
|
|
||||||
|
tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
|
||||||
|
const std::string& path,
|
||||||
|
const tl::optional<int64_t>& frame_offset,
|
||||||
|
const tl::optional<int64_t>& num_frames,
|
||||||
|
tl::optional<bool> normalize,
|
||||||
|
tl::optional<bool> channels_first,
|
||||||
|
const tl::optional<std::string>& format) {
|
||||||
|
auto effects = get_effects(frame_offset, num_frames);
|
||||||
|
return paddleaudio::sox_effects::apply_effects_file(
|
||||||
|
path, effects, normalize, channels_first, format);
|
||||||
|
}
|
||||||
|
|
||||||
|
void save_audio_file(
|
||||||
|
const std::string& path,
|
||||||
|
torch::Tensor tensor,
|
||||||
|
int64_t sample_rate,
|
||||||
|
bool channels_first,
|
||||||
|
tl::optional<double> compression,
|
||||||
|
tl::optional<std::string> format,
|
||||||
|
tl::optional<std::string> encoding,
|
||||||
|
tl::optional<int64_t> bits_per_sample) {
|
||||||
|
validate_input_tensor(tensor);
|
||||||
|
|
||||||
|
const auto filetype = [&]() {
|
||||||
|
if (format.has_value())
|
||||||
|
return format.value();
|
||||||
|
return get_filetype(path);
|
||||||
|
}();
|
||||||
|
|
||||||
|
if (filetype == "amr-nb") {
|
||||||
|
const auto num_channels = tensor.size(channels_first ? 0 : 1);
|
||||||
|
TORCH_CHECK(
|
||||||
|
num_channels == 1, "amr-nb format only supports single channel audio.");
|
||||||
|
} else if (filetype == "htk") {
|
||||||
|
const auto num_channels = tensor.size(channels_first ? 0 : 1);
|
||||||
|
TORCH_CHECK(
|
||||||
|
num_channels == 1, "htk format only supports single channel audio.");
|
||||||
|
} else if (filetype == "gsm") {
|
||||||
|
const auto num_channels = tensor.size(channels_first ? 0 : 1);
|
||||||
|
TORCH_CHECK(
|
||||||
|
num_channels == 1, "gsm format only supports single channel audio.");
|
||||||
|
TORCH_CHECK(
|
||||||
|
sample_rate == 8000,
|
||||||
|
"gsm format only supports a sampling rate of 8kHz.");
|
||||||
|
}
|
||||||
|
const auto signal_info =
|
||||||
|
get_signalinfo(&tensor, sample_rate, filetype, channels_first);
|
||||||
|
const auto encoding_info = get_encodinginfo_for_save(
|
||||||
|
filetype, tensor.dtype(), compression, encoding, bits_per_sample);
|
||||||
|
|
||||||
|
SoxFormat sf(sox_open_write(
|
||||||
|
path.c_str(),
|
||||||
|
&signal_info,
|
||||||
|
&encoding_info,
|
||||||
|
/*filetype=*/filetype.c_str(),
|
||||||
|
/*oob=*/nullptr,
|
||||||
|
/*overwrite_permitted=*/nullptr));
|
||||||
|
|
||||||
|
if (static_cast<sox_format_t*>(sf) == nullptr) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Error saving audio file: failed to open file " + path);
|
||||||
|
}
|
||||||
|
|
||||||
|
paddleaudio::sox_effects_chain::SoxEffectsChain chain(
|
||||||
|
/*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
|
||||||
|
/*output_encoding=*/sf->encoding);
|
||||||
|
chain.addInputTensor(&tensor, sample_rate, channels_first);
|
||||||
|
chain.addOutputFile(sf);
|
||||||
|
chain.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
TORCH_LIBRARY_FRAGMENT(paddleaudio, m) {
|
||||||
|
m.def("paddleaudio::sox_io_get_info", &paddleaudio::sox_io::get_info_file);
|
||||||
|
m.def(
|
||||||
|
"paddleaudio::sox_io_load_audio_file",
|
||||||
|
&paddleaudio::sox_io::load_audio_file);
|
||||||
|
m.def(
|
||||||
|
"paddleaudio::sox_io_save_audio_file",
|
||||||
|
&paddleaudio::sox_io::save_audio_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace sox_io
|
||||||
|
} // namespace paddleaudio
|
@ -0,0 +1,47 @@
|
|||||||
|
|
||||||
|
//Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
|
||||||
|
//All rights reserved.
|
||||||
|
|
||||||
|
#ifndef PADDLEAUDIO_SOX_IO_H
|
||||||
|
#define PADDLEAUDIO_SOX_IO_H
|
||||||
|
|
||||||
|
// #include "sox/utils.h"
|
||||||
|
#include "optional/optional.hpp"
|
||||||
|
|
||||||
|
namespace paddleaudio {
|
||||||
|
namespace sox_io {
|
||||||
|
|
||||||
|
auto get_effects(
|
||||||
|
const tl::optional<int64_t>& frame_offset,
|
||||||
|
const tl::optional<int64_t>& num_frames)
|
||||||
|
-> std::vector<std::vector<std::string>>;
|
||||||
|
|
||||||
|
using MetaDataTuple =
|
||||||
|
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
|
||||||
|
|
||||||
|
tl::optional<MetaDataTuple> get_info_file(
|
||||||
|
const std::string& path,
|
||||||
|
const tl::optional<std::string>& format);
|
||||||
|
|
||||||
|
tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
|
||||||
|
const std::string& path,
|
||||||
|
const tl::optional<int64_t>& frame_offset,
|
||||||
|
const tl::optional<int64_t>& num_frames,
|
||||||
|
tl::optional<bool> normalize,
|
||||||
|
tl::optional<bool> channels_first,
|
||||||
|
const tl::optional<std::string>& format);
|
||||||
|
|
||||||
|
void save_audio_file(
|
||||||
|
const std::string& path,
|
||||||
|
torch::Tensor tensor,
|
||||||
|
int64_t sample_rate,
|
||||||
|
bool channels_first,
|
||||||
|
tl::optional<double> compression,
|
||||||
|
tl::optional<std::string> format,
|
||||||
|
tl::optional<std::string> encoding,
|
||||||
|
tl::optional<int64_t> bits_per_sample);
|
||||||
|
|
||||||
|
} // namespace sox_io
|
||||||
|
} // namespace paddleaudio
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,33 @@
|
|||||||
|
namespace paddleaudio {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
bool is_sox_available() {
|
||||||
|
#ifdef INCLUDE_SOX
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_kaldi_available() {
|
||||||
|
#ifdef INCLUDE_KALDI
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// It tells whether paddleaudio was compiled with ffmpeg
|
||||||
|
// not the runtime availability.
|
||||||
|
bool is_ffmpeg_available() {
|
||||||
|
#ifdef USE_FFMPEG
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
} // namespace paddleaudio
|
@ -0,0 +1 @@
|
|||||||
|
from .extension import *
|
@ -0,0 +1,144 @@
|
|||||||
|
import distutils.sysconfig
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from setuptools import Extension
|
||||||
|
from setuptools.command.build_ext import build_ext
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"get_ext_modules",
|
||||||
|
"CMakeBuild",
|
||||||
|
]
|
||||||
|
|
||||||
|
_THIS_DIR = Path(__file__).parent.resolve()
|
||||||
|
_ROOT_DIR = _THIS_DIR.parent.parent.resolve()
|
||||||
|
_PADDLESPEECH_DIR = _ROOT_DIR / "paddlespeech"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_build(var, default=False):
|
||||||
|
if var not in os.environ:
|
||||||
|
return default
|
||||||
|
|
||||||
|
val = os.environ.get(var, "0")
|
||||||
|
trues = ["1", "true", "TRUE", "on", "ON", "yes", "YES"]
|
||||||
|
falses = ["0", "false", "FALSE", "off", "OFF", "no", "NO"]
|
||||||
|
if val in trues:
|
||||||
|
return True
|
||||||
|
if val not in falses:
|
||||||
|
print(f"WARNING: Unexpected environment variable value `{var}={val}`. " f"Expected one of {trues + falses}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
_BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
|
||||||
|
_BUILD_MAD = _get_build("BUILD_MAD", False)
|
||||||
|
# _BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
|
||||||
|
# _BUILD_RNNT = _get_build("BUILD_RNNT", True)
|
||||||
|
# _BUILD_CTC_DECODER = False if platform.system() == "Windows" else _get_build("BUILD_CTC_DECODER", True)
|
||||||
|
# _USE_FFMPEG = _get_build("USE_FFMPEG", False)
|
||||||
|
# _USE_ROCM = _get_build("USE_ROCM", torch.cuda.is_available() and torch.version.hip is not None)
|
||||||
|
# _USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available() and torch.version.hip is None)
|
||||||
|
# _USE_OPENMP = _get_build("USE_OPENMP", True) and "ATen parallel backend: OpenMP" in torch.__config__.parallel_info()
|
||||||
|
_PADDLESPEECH_CUDA_ARCH_LIST = os.environ.get("PADDLESPEECH_CUDA_ARCH_LIST", None)
|
||||||
|
|
||||||
|
|
||||||
|
def get_ext_modules():
|
||||||
|
modules = [
|
||||||
|
Extension(name="paddlespeech.audio.lib.libpaddleaudio", sources=[]),
|
||||||
|
Extension(name="paddlespeech.audio._paddleaudio", sources=[]),
|
||||||
|
]
|
||||||
|
return modules
|
||||||
|
|
||||||
|
|
||||||
|
# Based off of
|
||||||
|
# https://github.com/pybind/cmake_example/blob/580c5fd29d4651db99d8874714b07c0c49a53f8a/setup.py
|
||||||
|
class CMakeBuild(build_ext):
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
subprocess.check_output(["cmake", "--version"])
|
||||||
|
except OSError:
|
||||||
|
raise RuntimeError("CMake is not available.") from None
|
||||||
|
super().run()
|
||||||
|
|
||||||
|
def build_extension(self, ext):
|
||||||
|
# Since two library files (libpaddleaudio and _paddleaudio) need to be
|
||||||
|
# recognized by setuptools, we instantiate `Extension` twice. (see `get_ext_modules`)
|
||||||
|
# This leads to the situation where this `build_extension` method is called twice.
|
||||||
|
# However, the following `cmake` command will build all of them at the same time,
|
||||||
|
# so, we do not need to perform `cmake` twice.
|
||||||
|
# Therefore we call `cmake` only for `paddleaudio._paddleaudio`.
|
||||||
|
if ext.name != "paddlespeech.audio._paddleaudio":
|
||||||
|
return
|
||||||
|
|
||||||
|
extdir = os.path.abspath(os.path.dirname(self.get_ext_filename(ext.name)))
|
||||||
|
|
||||||
|
# required for auto-detection of auxiliary "native" libs
|
||||||
|
if not extdir.endswith(os.path.sep):
|
||||||
|
extdir += os.path.sep
|
||||||
|
|
||||||
|
cfg = "Debug" if self.debug else "Release"
|
||||||
|
|
||||||
|
cmake_args = [
|
||||||
|
f"-DCMAKE_BUILD_TYPE={cfg}",
|
||||||
|
# f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}",
|
||||||
|
f"-DCMAKE_INSTALL_PREFIX={extdir}",
|
||||||
|
"-DCMAKE_VERBOSE_MAKEFILE=ON",
|
||||||
|
f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
|
||||||
|
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
|
||||||
|
f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}",
|
||||||
|
# f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
|
||||||
|
# f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
|
||||||
|
# f"-DBUILD_CTC_DECODER:BOOL={'ON' if _BUILD_CTC_DECODER else 'OFF'}",
|
||||||
|
"-DBUILD_PADDLEAUDIO_PYTHON_EXTENSION:BOOL=ON",
|
||||||
|
# f"-DUSE_ROCM:BOOL={'ON' if _USE_ROCM else 'OFF'}",
|
||||||
|
# f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
|
||||||
|
# f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}",
|
||||||
|
# f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}",
|
||||||
|
]
|
||||||
|
build_args = ["--target", "install"]
|
||||||
|
# Pass CUDA architecture to cmake
|
||||||
|
if _PADDLESPEECH_CUDA_ARCH_LIST is not None:
|
||||||
|
# Convert MAJOR.MINOR[+PTX] list to new style one
|
||||||
|
# defined at https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html
|
||||||
|
_arches = _PADDLESPEECH_CUDA_ARCH_LIST.replace(".", "").replace(" ", ";").split(";")
|
||||||
|
_arches = [arch[:-4] if arch.endswith("+PTX") else f"{arch}-real" for arch in _arches]
|
||||||
|
cmake_args += [f"-DCMAKE_CUDA_ARCHITECTURES={';'.join(_arches)}"]
|
||||||
|
|
||||||
|
# Default to Ninja
|
||||||
|
if "CMAKE_GENERATOR" not in os.environ or platform.system() == "Windows":
|
||||||
|
cmake_args += ["-GNinja"]
|
||||||
|
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
import sys
|
||||||
|
|
||||||
|
python_version = sys.version_info
|
||||||
|
cmake_args += [
|
||||||
|
"-DCMAKE_C_COMPILER=cl",
|
||||||
|
"-DCMAKE_CXX_COMPILER=cl",
|
||||||
|
f"-DPYTHON_VERSION={python_version.major}.{python_version.minor}",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
|
||||||
|
# across all generators.
|
||||||
|
if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
|
||||||
|
# self.parallel is a Python 3 only way to set parallel jobs by hand
|
||||||
|
# using -j in the build_ext call, not supported by pip or PyPA-build.
|
||||||
|
if hasattr(self, "parallel") and self.parallel:
|
||||||
|
# CMake 3.12+ only.
|
||||||
|
build_args += ["-j{}".format(self.parallel)]
|
||||||
|
|
||||||
|
if not os.path.exists(self.build_temp):
|
||||||
|
os.makedirs(self.build_temp)
|
||||||
|
|
||||||
|
print(f"cmake {_ROOT_DIR} {' '.join(cmake_args)}, cwd={self.build_temp}")
|
||||||
|
subprocess.check_call(["cmake", str(_ROOT_DIR)] + cmake_args, cwd=self.build_temp)
|
||||||
|
print(f"cmake --build . {' '.join(build_args)}, cwd={self.build_temp}")
|
||||||
|
subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp)
|
||||||
|
|
||||||
|
def get_ext_filename(self, fullname):
|
||||||
|
ext_filename = super().get_ext_filename(fullname)
|
||||||
|
ext_filename_parts = ext_filename.split(".")
|
||||||
|
without_abi = ext_filename_parts[:-2] + ext_filename_parts[-1:]
|
||||||
|
ext_filename = ".".join(without_abi)
|
||||||
|
return ext_filename
|
Loading…
Reference in new issue