diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 7ad9bd6ec..233977bae 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -23,7 +23,7 @@ from typing import List import tqdm from yacs.config import CfgNode -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index 40adf53de..49c234a43 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -24,7 +24,7 @@ import random import tqdm from yacs.config import CfgNode -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/paddlespeech/audio/__init__.py b/paddlespeech/audio/__init__.py index f79f3d773..0e95377ee 100644 --- a/paddlespeech/audio/__init__.py +++ b/paddlespeech/audio/__init__.py @@ -26,6 +26,7 @@ from . import utils from paddlespeech.audio.backends import get_audio_backend from paddlespeech.audio.backends import list_audio_backends from paddlespeech.audio.backends import set_audio_backend +from paddlespeech.audio.backends import soundfile_backend __all__ = [ "io", @@ -37,4 +38,5 @@ __all__ = [ "list_audio_backends", "get_audio_backend", "set_audio_backend", + "soudfile_backend", ] diff --git a/paddlespeech/audio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py index 1afe3dc38..b3421e322 100644 --- a/paddlespeech/audio/backends/soundfile_backend.py +++ b/paddlespeech/audio/backends/soundfile_backend.py @@ -34,8 +34,9 @@ __all__ = [ 'save', 'soudfile_save', 'load', - 'soundfile_load', - 'info' + 'load_old', + 'info', + 'to_mono' ] NORMALMIZE_TYPES = ['linear', 'gaussian'] MERGE_TYPES = ['ch0', 'ch1', 'random', 'average'] @@ -122,7 +123,7 @@ def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray: return y_out -def soundfile_load(file: os.PathLike, +def soundfile_load_(file: os.PathLike, offset: Optional[float]=None, dtype: str='int16', duration: Optional[int]=None) -> Tuple[np.ndarray, int]: @@ -204,7 +205,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None: wavfile.write(file, sr, y_out) -def soudfile_load( +def soundfile_load( file: os.PathLike, sr: Optional[int]=None, mono: bool=True, @@ -235,7 +236,7 @@ def soudfile_load( Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate. """ - y, r = sound_file_load(file, offset=offset, dtype=dtype, duration=duration) + y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration) if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)): raise ParameterError(f'audio file {file} looks empty') diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index c869e28bf..5ace7fe0d 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -25,7 +25,7 @@ import yaml from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio import load +from paddlespeech.audio.soundfile_backend import soundfile_load as load from paddlespeech.audio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py index 111cfd754..bd15e80e6 100644 --- a/paddlespeech/cli/kws/infer.py +++ b/paddlespeech/cli/kws/infer.py @@ -24,7 +24,7 @@ import yaml from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio import load +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank __all__ = ['KWSExecutor'] diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 7fb7b4955..aeaada85c 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -27,7 +27,7 @@ from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index 3c58d61c4..93eee74b9 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -18,7 +18,7 @@ import numpy as np from paddle import inference from scipy.special import softmax -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.datasets import ESC50 from paddlespeech.audio.features import melspectrogram @@ -42,7 +42,7 @@ def extract_features(files: str, **kwargs): srs = [] max_length = float('-inf') for file in files: - waveform, sr = load_audio(file, sr=None) + waveform, sr = load_audio(file) max_length = max(max_length, len(waveform)) waveforms.append(waveform) srs.append(sr) diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index 309796452..cf04d8432 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -17,7 +17,7 @@ from collections import OrderedDict import numpy as np import paddle -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index 2d01598cd..e9203ef99 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -18,7 +18,7 @@ import time import paddle from yacs.config import CfgNode -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index 245b29592..c9d56b5ea 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -16,7 +16,8 @@ from dataclasses import fields from paddle.io import Dataset -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio + from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index 12e845771..32960e456 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -17,7 +17,7 @@ from dataclasses import fields from paddle.io import Dataset -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.audio.compliance.librosa import mfcc diff --git a/tests/unit/audio/features/base.py b/tests/unit/audio/features/base.py index 6d59f72b5..4049b6102 100644 --- a/tests/unit/audio/features/base.py +++ b/tests/unit/audio/features/base.py @@ -18,7 +18,7 @@ import urllib.request import numpy as np import paddle -from paddlespeech.audio import load +from paddlespeech.audio.soundfile_backend import soundfile_load as load wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'