pull/2157/head
Hui Zhang 3 years ago
parent a7181bcde8
commit 84e5bc0382

@ -5,6 +5,7 @@ from typing import Optional
#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py #code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
def is_module_available(*modules: str) -> bool: def is_module_available(*modules: str) -> bool:
r"""Returns if a top-level module with :attr:`name` exists *without** r"""Returns if a top-level module with :attr:`name` exists *without**
importing it. This is generally safer than try-catch block around a importing it. This is generally safer than try-catch block around a

@ -8,21 +8,25 @@ from paddle import Tensor
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
def load( def load(
filepath: Union[str, Path], filepath: Union[str, Path],
out: Optional[Tensor] = None, out: Optional[Tensor]=None,
normalization: Union[bool, float, Callable] = True, normalization: Union[bool, float, Callable]=True,
channels_first: bool = True, channels_first: bool=True,
num_frames: int = 0, num_frames: int=0,
offset: int = 0, offset: int=0,
filetype: Optional[str] = None, filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
) -> Tuple[Tensor, int]:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: def save(filepath: str,
src: Tensor,
sample_rate: int,
precision: int=16,
channels_first: bool=True) -> None:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")
def info(filepath: str) -> None: def info(filepath: str) -> None:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")

@ -1,4 +1,3 @@
from pathlib import Path from pathlib import Path
from typing import Callable from typing import Callable
from typing import Optional from typing import Optional
@ -9,21 +8,25 @@ from paddle import Tensor
#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
def load( def load(
filepath: Union[str, Path], filepath: Union[str, Path],
out: Optional[Tensor] = None, out: Optional[Tensor]=None,
normalization: Union[bool, float, Callable] = True, normalization: Union[bool, float, Callable]=True,
channels_first: bool = True, channels_first: bool=True,
num_frames: int = 0, num_frames: int=0,
offset: int = 0, offset: int=0,
filetype: Optional[str] = None, filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
) -> Tuple[Tensor, int]:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: def save(filepath: str,
src: Tensor,
sample_rate: int,
precision: int=16,
channels_first: bool=True) -> None:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")
def info(filepath: str) -> None: def info(filepath: str) -> None:
raise RuntimeError("No audio I/O backend is available.") raise RuntimeError("No audio I/O backend is available.")

@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
of the system. If ``None`` is provided the current backend is unassigned. of the system. If ``None`` is provided the current backend is unassigned.
""" """
if backend is not None and backend not in list_audio_backends(): if backend is not None and backend not in list_audio_backends():
raise RuntimeError(f'Backend "{backend}" is not one of ' f"available backends: {list_audio_backends()}.") raise RuntimeError(f'Backend "{backend}" is not one of '
f"available backends: {list_audio_backends()}.")
if backend is None: if backend is None:
module = no_backend module = no_backend
@ -76,6 +77,7 @@ def _init_audio_backend():
warnings.warn("No audio backend is available.") warnings.warn("No audio backend is available.")
set_audio_backend(None) set_audio_backend(None)
def get_audio_backend() -> Optional[str]: def get_audio_backend() -> Optional[str]:
"""Get the name of the current backend """Get the name of the current backend
@ -88,4 +90,4 @@ def get_audio_backend() -> Optional[str]:
return "sox_io" return "sox_io"
if paddlespeech.audio.load == soundfile_backend.load: if paddlespeech.audio.load == soundfile_backend.load:
return "soundfile" return "soundfile"
raise ValueError("Unknown backend.") raise ValueError("Unknown backend.")

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddlespeech.audio._internal import module_utils from paddlespeech.audio._internal import module_utils
import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank import paddlespeech.audio.ops.paddleaudio.ComputeFbank as ComputeFbank
import paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions as PitchExtractionOptions import paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions as PitchExtractionOptions
import paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions as FrameExtractionOptions import paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions as FrameExtractionOptions
@ -27,37 +27,38 @@ __all__ = [
@module_utils.requires_kaldi() @module_utils.requires_kaldi()
def fbank(wav, def fbank(
samp_freq: int=16000, wav,
frame_shift_ms: float=10.0, samp_freq: int=16000,
frame_length_ms: float=25.0, frame_shift_ms: float=10.0,
dither: float=0.0, frame_length_ms: float=25.0,
preemph_coeff: float=0.97, dither: float=0.0,
remove_dc_offset: bool=True, preemph_coeff: float=0.97,
window_type: str='povey', remove_dc_offset: bool=True,
round_to_power_of_two: bool=True, window_type: str='povey',
blackman_coeff: float=0.42, round_to_power_of_two: bool=True,
snip_edges: bool=True, blackman_coeff: float=0.42,
allow_downsample: bool=False, snip_edges: bool=True,
allow_upsample: bool=False, allow_downsample: bool=False,
max_feature_vectors: int=-1, allow_upsample: bool=False,
num_bins: int=23, max_feature_vectors: int=-1,
low_freq: float=20, num_bins: int=23,
high_freq: float=0, low_freq: float=20,
vtln_low: float=100, high_freq: float=0,
vtln_high: float=-500, vtln_low: float=100,
debug_mel: bool=False, vtln_high: float=-500,
htk_mode: bool=False, debug_mel: bool=False,
use_energy: bool=False, # fbank opts htk_mode: bool=False,
energy_floor: float=0.0, use_energy: bool=False, # fbank opts
raw_energy: bool=True, energy_floor: float=0.0,
htk_compat: bool=False, raw_energy: bool=True,
use_log_fbank: bool=True, htk_compat: bool=False,
use_power: bool=True): use_log_fbank: bool=True,
use_power: bool=True):
frame_opts = FrameExtractionOptions() frame_opts = FrameExtractionOptions()
mel_opts = MelBanksOptions() mel_opts = MelBanksOptions()
fbank_opts = FbankOptions() fbank_opts = FbankOptions()
frame_opts.samp_freq = samp_freq frame_opts.samp_freq = samp_freq
frame_opts.frame_shift_ms = frame_shift_ms frame_opts.frame_shift_ms = frame_shift_ms
frame_opts.frame_length_ms = frame_length_ms frame_opts.frame_length_ms = frame_length_ms
frame_opts.dither = dither frame_opts.dither = dither
@ -71,7 +72,7 @@ def fbank(wav,
frame_opts.allow_upsample = allow_upsample frame_opts.allow_upsample = allow_upsample
frame_opts.max_feature_vectors = max_feature_vectors frame_opts.max_feature_vectors = max_feature_vectors
mel_opts.num_bins = num_bins mel_opts.num_bins = num_bins
mel_opts.low_freq = low_freq mel_opts.low_freq = low_freq
mel_opts.high_freq = high_freq mel_opts.high_freq = high_freq
mel_opts.vtln_low = vtln_low mel_opts.vtln_low = vtln_low
@ -79,7 +80,7 @@ def fbank(wav,
mel_opts.debug_mel = debug_mel mel_opts.debug_mel = debug_mel
mel_opts.htk_mode = htk_mode mel_opts.htk_mode = htk_mode
fbank_opts.use_energy = use_energy fbank_opts.use_energy = use_energy
fbank_opts.energy_floor = energy_floor fbank_opts.energy_floor = energy_floor
fbank_opts.raw_energy = raw_energy fbank_opts.raw_energy = raw_energy
fbank_opts.htk_compat = htk_compat fbank_opts.htk_compat = htk_compat
@ -88,6 +89,7 @@ def fbank(wav,
feat = ComputeFbank(frame_opts, mel_opts, fbank_opts, wav) feat = ComputeFbank(frame_opts, mel_opts, fbank_opts, wav)
return feat return feat
@module_utils.requires_kaldi() @module_utils.requires_kaldi()
def pitch(wav, def pitch(wav,
samp_freq: int=16000, samp_freq: int=16000,
@ -114,7 +116,7 @@ def pitch(wav,
pitch_opts.samp_freq = samp_freq pitch_opts.samp_freq = samp_freq
pitch_opts.frame_shift_ms = frame_shift_ms pitch_opts.frame_shift_ms = frame_shift_ms
pitch_opts.frame_length_ms = frame_length_ms pitch_opts.frame_length_ms = frame_length_ms
pitch_opts.preemph_coeff = preemph_coeff pitch_opts.preemph_coeff = preemph_coeff
pitch_opts.min_f0 = min_f0 pitch_opts.min_f0 = min_f0
pitch_opts.max_f0 = max_f0 pitch_opts.max_f0 = max_f0
pitch_opts.soft_min_f0 = soft_min_f0 pitch_opts.soft_min_f0 = soft_min_f0

Loading…
Cancel
Save