diff --git a/examples/esc50/cls0/conf/panns.yaml b/examples/esc50/cls0/conf/panns.yaml index 1f0323f0d..3a9d42aa5 100644 --- a/examples/esc50/cls0/conf/panns.yaml +++ b/examples/esc50/cls0/conf/panns.yaml @@ -1,5 +1,5 @@ data: - dataset: 'paddlespeech.audio.datasets:ESC50' + dataset: 'paddleaudio.datasets:ESC50' num_classes: 50 train: mode: 'train' diff --git a/examples/hey_snips/kws0/conf/mdtc.yaml b/examples/hey_snips/kws0/conf/mdtc.yaml index 54d059472..857d36d46 100644 --- a/examples/hey_snips/kws0/conf/mdtc.yaml +++ b/examples/hey_snips/kws0/conf/mdtc.yaml @@ -2,7 +2,7 @@ ########################################### # Data # ########################################### -dataset: 'paddlespeech.audio.datasets:HeySnips' +dataset: 'paddleaudio.datasets:HeySnips' data_dir: '../tests/hey_snips_research_6k_en_train_eval_clean_ter' ############################################ diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index e5a5dff7b..b4486b6f0 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -14,9 +14,9 @@ import argparse import paddle +from paddleaudio.datasets.voxceleb import VoxCeleb from yacs.config import CfgNode -from paddlespeech.audio.datasets.voxceleb import VoxCeleb from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.training.seeding import seed_everything diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 233977bae..11908fe63 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -21,9 +21,9 @@ import os from typing import List import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index 49c234a43..ebeb598a4 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -22,9 +22,9 @@ import os import random import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index 5ace7fe0d..5e9b5acec 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -25,8 +25,8 @@ import yaml from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load -from paddlespeech.audio.features import LogMelSpectrogram +from paddleaudio.backends import soundfile_load as load +from paddleaudio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py index bd15e80e6..ce2f3f461 100644 --- a/paddlespeech/cli/kws/infer.py +++ b/paddlespeech/cli/kws/infer.py @@ -20,12 +20,12 @@ from typing import Union import paddle import yaml +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.kaldi import fbank as kaldi_fbank from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank __all__ = ['KWSExecutor'] @@ -139,7 +139,7 @@ class KWSExecutor(BaseExecutor): Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ assert os.path.isfile(audio_file) - waveform, _ = load(audio_file) + waveform, _ = load_audio(audio_file) if isinstance(audio_file, (str, os.PathLike)): logger.debug("Preprocessing audio_file:" + audio_file) diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 5a66b4861..57a781656 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -22,13 +22,13 @@ from typing import Union import paddle import soundfile +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index 93eee74b9..ae46890bd 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,12 +16,11 @@ import os import numpy as np from paddle import inference +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.datasets import ESC50 +from paddleaudio.features import melspectrogram from scipy.special import softmax -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.datasets import ESC50 -from paddlespeech.audio.features import melspectrogram - # yapf: disable parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.") diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index e62d58f02..63b22981a 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle +from paddleaudio.datasets import ESC50 -from paddlespeech.audio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index 97759a89d..feeee24e3 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -17,10 +17,10 @@ import os import paddle import paddle.nn.functional as F import yaml +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.utils import logger -from paddlespeech.audio.backends import load as load_audio -from paddlespeech.audio.features import LogMelSpectrogram -from paddlespeech.audio.utils import logger from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index fba38a01c..9258ab516 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -17,9 +17,9 @@ import os import paddle import yaml -from paddlespeech.audio.features import LogMelSpectrogram -from paddlespeech.audio.utils import logger -from paddlespeech.audio.utils import Timer +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.utils import logger +from paddlesaudio.utils import Timer from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index 37deae80c..6f9af9b52 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,8 +15,8 @@ import os import paddle.nn as nn import paddle.nn.functional as F +from paddleaudio.utils.download import load_state_dict_from_url -from paddlespeech.audio.utils.download import load_state_dict_from_url from paddlespeech.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py index 94e45d590..5a9ca92d1 100644 --- a/paddlespeech/kws/exps/mdtc/train.py +++ b/paddlespeech/kws/exps/mdtc/train.py @@ -14,10 +14,10 @@ import os import paddle +from paddleaudio.utils import logger +from paddleaudio.utils import Timer from yacs.config import CfgNode -from paddlespeech.audio.utils import logger -from paddlespeech.audio.utils import Timer from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index ac5720fd5..22329d5e0 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -14,11 +14,10 @@ """Contains the audio featurizer class.""" import numpy as np import paddle +import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import mfcc -import paddlespeech.audio.compliance.kaldi as kaldi - class AudioFeaturizer(): """Audio featurizer, for extracting features from audio contents of diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 8a9849492..80f187282 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -28,10 +28,10 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn +from paddleaudio.utils.tensor_utils import add_sos_eos +from paddleaudio.utils.tensor_utils import pad_sequence +from paddleaudio.utils.tensor_utils import th_accuracy -from paddlespeech.audio.utils.tensor_utils import add_sos_eos -from paddlespeech.audio.utils.tensor_utils import pad_sequence -from paddlespeech.audio.utils.tensor_utils import th_accuracy from paddlespeech.s2t.decoders.scorers.ctc import CTCPrefixScorer from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index e8b61bc0d..4b68c1ae1 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -24,9 +24,9 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn +from paddleaudio.utils.tensor_utils import add_sos_eos +from paddleaudio.utils.tensor_utils import th_accuracy -from paddlespeech.audio.utils.tensor_utils import add_sos_eos -from paddlespeech.audio.utils.tensor_utils import th_accuracy from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn from paddlespeech.s2t.modules.cmvn import GlobalCMVN diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index e617c3650..7d86f3df7 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -16,9 +16,9 @@ from collections import OrderedDict import numpy as np import paddle +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor from paddlespeech.server.engine.base_engine import BaseEngine diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index 32546a330..6aa6fd589 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle +import paddleaudio import requests import yaml from paddle.framework import load -import paddlespeech.audio from .entry import client_commands from .entry import server_commands from paddlespeech.cli import download @@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddlespeech.audio.load(params['audio_file']) + _, sr = paddleaudio.backends.soundfile_load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index e9203ef99..821b1deed 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -16,10 +16,10 @@ import os import time import paddle +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index 6c87dbe7b..f15dbf9b7 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -18,10 +18,10 @@ import numpy as np import paddle from paddle.io import BatchSampler from paddle.io import DataLoader +from paddleaudio.metric import compute_eer from tqdm import tqdm from yacs.config import CfgNode -from paddlespeech.audio.metric import compute_eer from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import batch_feature_normalize from paddlespeech.vector.io.dataset import CSVDataset diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index 961b75e29..bf014045d 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -20,9 +20,9 @@ import paddle from paddle.io import BatchSampler from paddle.io import DataLoader from paddle.io import DistributedBatchSampler +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.io.augment import waveform_augment diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index c9d56b5ea..dff8ad9fd 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -15,10 +15,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio - -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index 32960e456..852f39a94 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -16,10 +16,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset - -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram -from paddlespeech.audio.compliance.librosa import mfcc +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram +from paddleaudio.compliance.librosa import mfcc @dataclass