merge audio

pull/2164/head
Yang Zhou 3 years ago
commit 38c55e44e8

@ -57,7 +57,7 @@ include(openblas)
# packages
find_package(Python3 COMPONENTS Interpreter Development)
find_package(pybind11 CONFIG)
find_package(pybind11 CONFIG REQUIRED)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")

@ -37,5 +37,9 @@ function (onnx_print_configuration_summary)
message(STATUS " Python executable : ${Python_EXECUTABLE}")
message(STATUS " Python includes : ${Python_INCLUDE_DIR}")
message(STATUS " Python libraries : ${Python_LIBRARY}")
message(STATUS " PYBIND11 : ${pybind11_FOUND}")
message(STATUS " Pybind11 version : ${pybind11_VERSION}")
message(STATUS " Pybind11 include : ${pybind11_INCLUDE_DIR}")
message(STATUS " Pybind11 includes : ${pybind11_INCLUDE_DIRS}")
message(STATUS " Pybind11 libraries : ${pybind11_LIBRARIES}")
endfunction()

@ -5,6 +5,7 @@ from typing import Optional
#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
def is_module_available(*modules: str) -> bool:
r"""Returns if a top-level module with :attr:`name` exists *without**
importing it. This is generally safer than try-catch block around a

@ -8,19 +8,23 @@ from paddle import Tensor
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
def load(
filepath: Union[str, Path],
out: Optional[Tensor] = None,
normalization: Union[bool, float, Callable] = True,
channels_first: bool = True,
num_frames: int = 0,
offset: int = 0,
filetype: Optional[str] = None,
) -> Tuple[Tensor, int]:
filepath: Union[str, Path],
out: Optional[Tensor]=None,
normalization: Union[bool, float, Callable]=True,
channels_first: bool=True,
num_frames: int=0,
offset: int=0,
filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
raise RuntimeError("No audio I/O backend is available.")
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
def save(filepath: str,
src: Tensor,
sample_rate: int,
precision: int=16,
channels_first: bool=True) -> None:
raise RuntimeError("No audio I/O backend is available.")

@ -1,4 +1,3 @@
from pathlib import Path
from typing import Callable
from typing import Optional
@ -43,17 +42,20 @@ _fallback_load = _fail_load
_fallback_load_filebj = _fail_load_fileobj
def load(
filepath: Union[str, Path],
out: Optional[Tensor] = None,
normalization: Union[bool, float, Callable] = True,
channels_first: bool = True,
num_frames: int = 0,
offset: int = 0,
filetype: Optional[str] = None,
) -> Tuple[Tensor, int]:
filepath: Union[str, Path],
out: Optional[Tensor]=None,
normalization: Union[bool, float, Callable]=True,
channels_first: bool=True,
num_frames: int=0,
offset: int=0,
filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
raise RuntimeError("No audio I/O backend is available.")
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
def save(filepath: str,
src: Tensor,
sample_rate: int,
precision: int = 16,
channels_first: bool = True) -> None:
raise RuntimeError("No audio I/O backend is available.")
@_mod_utils.requires_sox()

@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
of the system. If ``None`` is provided the current backend is unassigned.
"""
if backend is not None and backend not in list_audio_backends():
raise RuntimeError(f'Backend "{backend}" is not one of ' f"available backends: {list_audio_backends()}.")
raise RuntimeError(f'Backend "{backend}" is not one of '
f"available backends: {list_audio_backends()}.")
if backend is None:
module = no_backend
@ -76,6 +77,7 @@ def _init_audio_backend():
warnings.warn("No audio backend is available.")
set_audio_backend(None)
def get_audio_backend() -> Optional[str]:
"""Get the name of the current backend

@ -27,33 +27,34 @@ __all__ = [
@module_utils.requires_kaldi()
def fbank(wav,
samp_freq: int=16000,
frame_shift_ms: float=10.0,
frame_length_ms: float=25.0,
dither: float=0.0,
preemph_coeff: float=0.97,
remove_dc_offset: bool=True,
window_type: str='povey',
round_to_power_of_two: bool=True,
blackman_coeff: float=0.42,
snip_edges: bool=True,
allow_downsample: bool=False,
allow_upsample: bool=False,
max_feature_vectors: int=-1,
num_bins: int=23,
low_freq: float=20,
high_freq: float=0,
vtln_low: float=100,
vtln_high: float=-500,
debug_mel: bool=False,
htk_mode: bool=False,
use_energy: bool=False, # fbank opts
energy_floor: float=0.0,
raw_energy: bool=True,
htk_compat: bool=False,
use_log_fbank: bool=True,
use_power: bool=True):
def fbank(
wav,
samp_freq: int=16000,
frame_shift_ms: float=10.0,
frame_length_ms: float=25.0,
dither: float=0.0,
preemph_coeff: float=0.97,
remove_dc_offset: bool=True,
window_type: str='povey',
round_to_power_of_two: bool=True,
blackman_coeff: float=0.42,
snip_edges: bool=True,
allow_downsample: bool=False,
allow_upsample: bool=False,
max_feature_vectors: int=-1,
num_bins: int=23,
low_freq: float=20,
high_freq: float=0,
vtln_low: float=100,
vtln_high: float=-500,
debug_mel: bool=False,
htk_mode: bool=False,
use_energy: bool=False, # fbank opts
energy_floor: float=0.0,
raw_energy: bool=True,
htk_compat: bool=False,
use_log_fbank: bool=True,
use_power: bool=True):
frame_opts = FrameExtractionOptions()
mel_opts = MelBanksOptions()
fbank_opts = FbankOptions()
@ -88,6 +89,7 @@ def fbank(wav,
feat = ComputeFbank(frame_opts, mel_opts, fbank_opts, wav)
return feat
@module_utils.requires_kaldi()
def pitch(wav,
samp_freq: int=16000,

@ -105,7 +105,7 @@ function(define_extension name sources include_dirs libraries definitions)
add_library(${name} SHARED ${sources})
target_compile_definitions(${name} PRIVATE "${definitions}")
target_include_directories(
${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${include_dirs})
${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${pybind11_INCLUDE_DIR} ${include_dirs})
target_link_libraries(
${name}
${libraries}

@ -14,8 +14,8 @@
#pragma once
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "pybind11/pybind11.h"
#include "pybind11/numpy.h"
#include "feat/feature-window.h"
namespace paddleaudio {

Loading…
Cancel
Save