diff --git a/.mergify.yml b/.mergify.yml index 68b24810..5cb1f486 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -52,7 +52,7 @@ pull_request_rules: add: ["T2S"] - name: "auto add label=Audio" conditions: - - files~=^paddleaudio/ + - files~=^paddlespeech/audio/ actions: label: add: ["Audio"] @@ -100,7 +100,7 @@ pull_request_rules: add: ["README"] - name: "auto add label=Documentation" conditions: - - files~=^(docs/|CHANGELOG.md|paddleaudio/CHANGELOG.md) + - files~=^(docs/|CHANGELOG.md) actions: label: add: ["Documentation"] diff --git a/audio/.gitignore b/audio/.gitignore deleted file mode 100644 index 1c930053..00000000 --- a/audio/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -.eggs -*.wav diff --git a/audio/CHANGELOG.md b/audio/CHANGELOG.md deleted file mode 100644 index 925d7769..00000000 --- a/audio/CHANGELOG.md +++ /dev/null @@ -1,9 +0,0 @@ -# Changelog - -Date: 2022-3-15, Author: Xiaojie Chen. - - kaldi and librosa mfcc, fbank, spectrogram. - - unit test and benchmark. - -Date: 2022-2-25, Author: Hui Zhang. - - Refactor architecture. - - dtw distance and mcd style dtw. diff --git a/audio/README.md b/audio/README.md deleted file mode 100644 index 697c0173..00000000 --- a/audio/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# PaddleAudio - -PaddleAudio is an audio library for PaddlePaddle. - -## Install - -`pip install .` diff --git a/audio/docs/Makefile b/audio/docs/Makefile deleted file mode 100644 index 69fe55ec..00000000 --- a/audio/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/audio/docs/README.md b/audio/docs/README.md deleted file mode 100644 index 20626f52..00000000 --- a/audio/docs/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Build docs for PaddleAudio - -Execute the following steps in **current directory**. - -## 1. Install - -`pip install Sphinx sphinx_rtd_theme` - - -## 2. Generate API docs - -Generate API docs from doc string. - -`sphinx-apidoc -fMeT -o source ../paddleaudio ../paddleaudio/utils --templatedir source/_templates` - - -## 3. Build - -`sphinx-build source _html` - - -## 4. Preview - -Open `_html/index.html` for page preview. diff --git a/audio/docs/images/paddle.png b/audio/docs/images/paddle.png deleted file mode 100644 index bc1135ab..00000000 Binary files a/audio/docs/images/paddle.png and /dev/null differ diff --git a/audio/docs/make.bat b/audio/docs/make.bat deleted file mode 100644 index 543c6b13..00000000 --- a/audio/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/audio/paddleaudio/utils/env.py b/audio/paddleaudio/utils/env.py deleted file mode 100644 index a2d14b89..00000000 --- a/audio/paddleaudio/utils/env.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -This module is used to store environmental variables in PaddleAudio. -PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the -├ default value through the PPAUDIO_HOME environment variable. -├─ MODEL_HOME --> Store model files. -└─ DATA_HOME --> Store automatically downloaded datasets. -''' -import os - -__all__ = [ - 'USER_HOME', - 'PPAUDIO_HOME', - 'MODEL_HOME', - 'DATA_HOME', -] - - -def _get_user_home(): - return os.path.expanduser('~') - - -def _get_ppaudio_home(): - if 'PPAUDIO_HOME' in os.environ: - home_path = os.environ['PPAUDIO_HOME'] - if os.path.exists(home_path): - if os.path.isdir(home_path): - return home_path - else: - raise RuntimeError( - 'The environment variable PPAUDIO_HOME {} is not a directory.'. - format(home_path)) - else: - return home_path - return os.path.join(_get_user_home(), '.paddleaudio') - - -def _get_sub_home(directory): - home = os.path.join(_get_ppaudio_home(), directory) - if not os.path.exists(home): - os.makedirs(home) - return home - - -USER_HOME = _get_user_home() -PPAUDIO_HOME = _get_ppaudio_home() -MODEL_HOME = _get_sub_home('models') -DATA_HOME = _get_sub_home('datasets') diff --git a/audio/setup.py b/audio/setup.py deleted file mode 100644 index 80fe07b7..00000000 --- a/audio/setup.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import glob -import os - -import setuptools -from setuptools.command.install import install -from setuptools.command.test import test - -# set the version here -VERSION = '0.0.0' - - -# Inspired by the example at https://pytest.org/latest/goodpractises.html -class TestCommand(test): - def finalize_options(self): - test.finalize_options(self) - self.test_args = [] - self.test_suite = True - - def run(self): - self.run_benchmark() - super(TestCommand, self).run() - - def run_tests(self): - # Run nose ensuring that argv simulates running nosetests directly - import nose - nose.run_exit(argv=['nosetests', '-w', 'tests']) - - def run_benchmark(self): - for benchmark_item in glob.glob('tests/benchmark/*py'): - os.system(f'pytest {benchmark_item}') - - -class InstallCommand(install): - def run(self): - install.run(self) - - -def write_version_py(filename='paddleaudio/__init__.py'): - with open(filename, "a") as f: - f.write(f"__version__ = '{VERSION}'") - - -def remove_version_py(filename='paddleaudio/__init__.py'): - with open(filename, "r") as f: - lines = f.readlines() - with open(filename, "w") as f: - for line in lines: - if "__version__" not in line: - f.write(line) - - -remove_version_py() -write_version_py() - -setuptools.setup( - name="paddleaudio", - version=VERSION, - author="", - author_email="", - description="PaddleAudio, in development", - long_description="", - long_description_content_type="text/markdown", - url="", - packages=setuptools.find_packages(include=['paddleaudio*']), - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.6', - install_requires=[ - 'numpy >= 1.15.0', 'scipy >= 1.0.0', 'resampy >= 0.2.2', - 'soundfile >= 0.9.0', 'colorlog', 'pathos == 0.2.8' - ], - extras_require={ - 'test': [ - 'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1', - 'torchaudio==0.10.2', 'pytest-benchmark' - ], - }, - cmdclass={ - 'install': InstallCommand, - 'test': TestCommand, - }, ) - -remove_version_py() diff --git a/audio/tests/.gitkeep b/audio/tests/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md index e829d991..db38d14e 100644 --- a/demos/audio_searching/README.md +++ b/demos/audio_searching/README.md @@ -89,7 +89,7 @@ Then to start the system server, and it provides HTTP backend services. Then start the server with Fastapi. ```bash - export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio + export PYTHONPATH=$PYTHONPATH:./src python src/audio_search.py ``` diff --git a/demos/audio_searching/README_cn.md b/demos/audio_searching/README_cn.md index c13742af..6d38b91f 100644 --- a/demos/audio_searching/README_cn.md +++ b/demos/audio_searching/README_cn.md @@ -91,7 +91,7 @@ ffce340b3790 minio/minio:RELEASE.2020-12-03T00-03-10Z "/usr/bin/docker-ent…" 启动用 Fastapi 构建的服务 ```bash - export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio + export PYTHONPATH=$PYTHONPATH:./src python src/audio_search.py ``` diff --git a/audio/docs/source/_static/custom.css b/docs/source/audio/_static/custom.css similarity index 100% rename from audio/docs/source/_static/custom.css rename to docs/source/audio/_static/custom.css diff --git a/audio/docs/source/_templates/module.rst_t b/docs/source/audio/_templates/module.rst_t similarity index 100% rename from audio/docs/source/_templates/module.rst_t rename to docs/source/audio/_templates/module.rst_t diff --git a/audio/docs/source/_templates/package.rst_t b/docs/source/audio/_templates/package.rst_t similarity index 100% rename from audio/docs/source/_templates/package.rst_t rename to docs/source/audio/_templates/package.rst_t diff --git a/audio/docs/source/_templates/toc.rst_t b/docs/source/audio/_templates/toc.rst_t similarity index 100% rename from audio/docs/source/_templates/toc.rst_t rename to docs/source/audio/_templates/toc.rst_t diff --git a/audio/docs/source/conf.py b/docs/source/audio/conf.py similarity index 100% rename from audio/docs/source/conf.py rename to docs/source/audio/conf.py diff --git a/audio/docs/source/index.rst b/docs/source/audio/index.rst similarity index 100% rename from audio/docs/source/index.rst rename to docs/source/audio/index.rst diff --git a/docs/source/cls/custom_dataset.md b/docs/source/cls/custom_dataset.md index aaf5943c..e39dcf12 100644 --- a/docs/source/cls/custom_dataset.md +++ b/docs/source/cls/custom_dataset.md @@ -1,8 +1,8 @@ # Customize Dataset for Audio Classification -Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech` and `paddleaudio`. +Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech`. -A base class of classification dataset is `paddleaudio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. +A base class of classification dataset is `paddlespeech.audio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. Assuming you have some wave files that stored in your own directory. You should prepare a meta file with the information of filepaths and labels. For example the absolute path of it is `/PATH/TO/META_FILE.txt`: ``` @@ -14,7 +14,7 @@ Assuming you have some wave files that stored in your own directory. You should Here is an example to build your custom dataset in `custom_dataset.py`: ```python -from paddleaudio.datasets.dataset import AudioClassificationDataset +from paddlespeech.audio.datasets.dataset import AudioClassificationDataset class CustomDataset(AudioClassificationDataset): meta_file = '/PATH/TO/META_FILE.txt' @@ -48,7 +48,7 @@ class CustomDataset(AudioClassificationDataset): Then you can build dataset and data loader from `CustomDataset`: ```python import paddle -from paddleaudio.features import LogMelSpectrogram +from paddlespeech.audio.features import LogMelSpectrogram from custom_dataset import CustomDataset diff --git a/examples/esc50/cls0/conf/panns.yaml b/examples/esc50/cls0/conf/panns.yaml index 3a9d42aa..1f0323f0 100644 --- a/examples/esc50/cls0/conf/panns.yaml +++ b/examples/esc50/cls0/conf/panns.yaml @@ -1,5 +1,5 @@ data: - dataset: 'paddleaudio.datasets:ESC50' + dataset: 'paddlespeech.audio.datasets:ESC50' num_classes: 50 train: mode: 'train' diff --git a/examples/hey_snips/kws0/conf/mdtc.yaml b/examples/hey_snips/kws0/conf/mdtc.yaml index 4bd0708c..76e47bc7 100644 --- a/examples/hey_snips/kws0/conf/mdtc.yaml +++ b/examples/hey_snips/kws0/conf/mdtc.yaml @@ -2,7 +2,7 @@ ########################################### # Data # ########################################### -dataset: 'paddleaudio.datasets:HeySnips' +dataset: 'paddlespeech.audio.datasets:HeySnips' data_dir: '/PATH/TO/DATA/hey_snips_research_6k_en_train_eval_clean_ter' ############################################ diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index b4486b6f..e5a5dff7 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -14,9 +14,9 @@ import argparse import paddle -from paddleaudio.datasets.voxceleb import VoxCeleb from yacs.config import CfgNode +from paddlespeech.audio.datasets.voxceleb import VoxCeleb from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.training.seeding import seed_everything diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 0d0163f1..7ad9bd6e 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -21,9 +21,9 @@ import os from typing import List import tqdm -from paddleaudio import load as load_audio from yacs.config import CfgNode +from paddlespeech.audio import load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index ffd0d212..40adf53d 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -22,9 +22,9 @@ import os import random import tqdm -from paddleaudio import load as load_audio from yacs.config import CfgNode +from paddlespeech.audio import load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/audio/paddleaudio/__init__.py b/paddlespeech/audio/__init__.py similarity index 100% rename from audio/paddleaudio/__init__.py rename to paddlespeech/audio/__init__.py diff --git a/audio/paddleaudio/backends/__init__.py b/paddlespeech/audio/backends/__init__.py similarity index 100% rename from audio/paddleaudio/backends/__init__.py rename to paddlespeech/audio/backends/__init__.py diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py similarity index 100% rename from audio/paddleaudio/backends/soundfile_backend.py rename to paddlespeech/audio/backends/soundfile_backend.py diff --git a/audio/paddleaudio/backends/sox_backend.py b/paddlespeech/audio/backends/sox_backend.py similarity index 100% rename from audio/paddleaudio/backends/sox_backend.py rename to paddlespeech/audio/backends/sox_backend.py diff --git a/audio/paddleaudio/compliance/__init__.py b/paddlespeech/audio/compliance/__init__.py similarity index 100% rename from audio/paddleaudio/compliance/__init__.py rename to paddlespeech/audio/compliance/__init__.py diff --git a/audio/paddleaudio/compliance/kaldi.py b/paddlespeech/audio/compliance/kaldi.py similarity index 100% rename from audio/paddleaudio/compliance/kaldi.py rename to paddlespeech/audio/compliance/kaldi.py diff --git a/audio/paddleaudio/compliance/librosa.py b/paddlespeech/audio/compliance/librosa.py similarity index 100% rename from audio/paddleaudio/compliance/librosa.py rename to paddlespeech/audio/compliance/librosa.py diff --git a/audio/paddleaudio/datasets/__init__.py b/paddlespeech/audio/datasets/__init__.py similarity index 100% rename from audio/paddleaudio/datasets/__init__.py rename to paddlespeech/audio/datasets/__init__.py diff --git a/audio/paddleaudio/datasets/dataset.py b/paddlespeech/audio/datasets/dataset.py similarity index 100% rename from audio/paddleaudio/datasets/dataset.py rename to paddlespeech/audio/datasets/dataset.py diff --git a/audio/paddleaudio/datasets/esc50.py b/paddlespeech/audio/datasets/esc50.py similarity index 99% rename from audio/paddleaudio/datasets/esc50.py rename to paddlespeech/audio/datasets/esc50.py index e7477d40..f5c7050f 100644 --- a/audio/paddleaudio/datasets/esc50.py +++ b/paddlespeech/audio/datasets/esc50.py @@ -16,8 +16,8 @@ import os from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['ESC50'] diff --git a/audio/paddleaudio/datasets/gtzan.py b/paddlespeech/audio/datasets/gtzan.py similarity index 99% rename from audio/paddleaudio/datasets/gtzan.py rename to paddlespeech/audio/datasets/gtzan.py index cfea6f37..1f6835a5 100644 --- a/audio/paddleaudio/datasets/gtzan.py +++ b/paddlespeech/audio/datasets/gtzan.py @@ -17,8 +17,8 @@ import random from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['GTZAN'] diff --git a/audio/paddleaudio/datasets/hey_snips.py b/paddlespeech/audio/datasets/hey_snips.py similarity index 100% rename from audio/paddleaudio/datasets/hey_snips.py rename to paddlespeech/audio/datasets/hey_snips.py diff --git a/audio/paddleaudio/datasets/rirs_noises.py b/paddlespeech/audio/datasets/rirs_noises.py similarity index 100% rename from audio/paddleaudio/datasets/rirs_noises.py rename to paddlespeech/audio/datasets/rirs_noises.py diff --git a/audio/paddleaudio/datasets/tess.py b/paddlespeech/audio/datasets/tess.py similarity index 99% rename from audio/paddleaudio/datasets/tess.py rename to paddlespeech/audio/datasets/tess.py index 8faab9c3..1469fa5e 100644 --- a/audio/paddleaudio/datasets/tess.py +++ b/paddlespeech/audio/datasets/tess.py @@ -17,8 +17,8 @@ import random from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['TESS'] diff --git a/audio/paddleaudio/datasets/urban_sound.py b/paddlespeech/audio/datasets/urban_sound.py similarity index 99% rename from audio/paddleaudio/datasets/urban_sound.py rename to paddlespeech/audio/datasets/urban_sound.py index d97c4d1d..0389cd5f 100644 --- a/audio/paddleaudio/datasets/urban_sound.py +++ b/paddlespeech/audio/datasets/urban_sound.py @@ -16,8 +16,8 @@ import os from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['UrbanSound8K'] diff --git a/audio/paddleaudio/datasets/voxceleb.py b/paddlespeech/audio/datasets/voxceleb.py similarity index 100% rename from audio/paddleaudio/datasets/voxceleb.py rename to paddlespeech/audio/datasets/voxceleb.py diff --git a/audio/paddleaudio/features/__init__.py b/paddlespeech/audio/features/__init__.py similarity index 100% rename from audio/paddleaudio/features/__init__.py rename to paddlespeech/audio/features/__init__.py diff --git a/audio/paddleaudio/features/layers.py b/paddlespeech/audio/features/layers.py similarity index 100% rename from audio/paddleaudio/features/layers.py rename to paddlespeech/audio/features/layers.py diff --git a/audio/paddleaudio/functional/__init__.py b/paddlespeech/audio/functional/__init__.py similarity index 100% rename from audio/paddleaudio/functional/__init__.py rename to paddlespeech/audio/functional/__init__.py diff --git a/audio/paddleaudio/functional/functional.py b/paddlespeech/audio/functional/functional.py similarity index 100% rename from audio/paddleaudio/functional/functional.py rename to paddlespeech/audio/functional/functional.py diff --git a/audio/paddleaudio/functional/window.py b/paddlespeech/audio/functional/window.py similarity index 100% rename from audio/paddleaudio/functional/window.py rename to paddlespeech/audio/functional/window.py diff --git a/audio/paddleaudio/io/__init__.py b/paddlespeech/audio/io/__init__.py similarity index 100% rename from audio/paddleaudio/io/__init__.py rename to paddlespeech/audio/io/__init__.py diff --git a/audio/paddleaudio/metric/__init__.py b/paddlespeech/audio/metric/__init__.py similarity index 100% rename from audio/paddleaudio/metric/__init__.py rename to paddlespeech/audio/metric/__init__.py diff --git a/audio/paddleaudio/metric/eer.py b/paddlespeech/audio/metric/eer.py similarity index 100% rename from audio/paddleaudio/metric/eer.py rename to paddlespeech/audio/metric/eer.py diff --git a/audio/paddleaudio/sox_effects/__init__.py b/paddlespeech/audio/sox_effects/__init__.py similarity index 100% rename from audio/paddleaudio/sox_effects/__init__.py rename to paddlespeech/audio/sox_effects/__init__.py diff --git a/audio/paddleaudio/utils/__init__.py b/paddlespeech/audio/utils/__init__.py similarity index 88% rename from audio/paddleaudio/utils/__init__.py rename to paddlespeech/audio/utils/__init__.py index afb9cedd..742f9f8e 100644 --- a/audio/paddleaudio/utils/__init__.py +++ b/paddlespeech/audio/utils/__init__.py @@ -11,13 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from ...cli.utils import DATA_HOME +from ...cli.utils import MODEL_HOME from .download import decompress from .download import download_and_decompress from .download import load_state_dict_from_url -from .env import DATA_HOME -from .env import MODEL_HOME -from .env import PPAUDIO_HOME -from .env import USER_HOME from .error import ParameterError from .log import Logger from .log import logger diff --git a/audio/paddleaudio/utils/download.py b/paddlespeech/audio/utils/download.py similarity index 100% rename from audio/paddleaudio/utils/download.py rename to paddlespeech/audio/utils/download.py diff --git a/audio/paddleaudio/utils/error.py b/paddlespeech/audio/utils/error.py similarity index 100% rename from audio/paddleaudio/utils/error.py rename to paddlespeech/audio/utils/error.py diff --git a/audio/paddleaudio/utils/log.py b/paddlespeech/audio/utils/log.py similarity index 100% rename from audio/paddleaudio/utils/log.py rename to paddlespeech/audio/utils/log.py diff --git a/audio/paddleaudio/utils/numeric.py b/paddlespeech/audio/utils/numeric.py similarity index 100% rename from audio/paddleaudio/utils/numeric.py rename to paddlespeech/audio/utils/numeric.py diff --git a/audio/paddleaudio/utils/time.py b/paddlespeech/audio/utils/time.py similarity index 100% rename from audio/paddleaudio/utils/time.py rename to paddlespeech/audio/utils/time.py diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index d31379b8..942dc3b9 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -21,12 +21,12 @@ from typing import Union import numpy as np import paddle import yaml -from paddleaudio import load -from paddleaudio.features import LogMelSpectrogram from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper +from paddlespeech.audio import load +from paddlespeech.audio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/utils.py b/paddlespeech/cli/utils.py index 128767e6..21c887e9 100644 --- a/paddlespeech/cli/utils.py +++ b/paddlespeech/cli/utils.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle -import paddleaudio import requests import yaml from paddle.framework import load +import paddlespeech.audio from . import download from .entry import commands try: @@ -190,6 +190,7 @@ def _get_sub_home(directory): PPSPEECH_HOME = _get_paddlespcceh_home() MODEL_HOME = _get_sub_home('models') CONF_HOME = _get_sub_home('conf') +DATA_HOME = _get_sub_home('datasets') def _md5(text: str): @@ -281,7 +282,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddleaudio.load(params['audio_file']) + _, sr = paddlespeech.audio.load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index d049ba7d..4bc8e135 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -22,13 +22,13 @@ from typing import Union import paddle import soundfile -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index ee566ed4..fe1c93fa 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,11 +16,12 @@ import os import numpy as np from paddle import inference -from paddleaudio.backends import load as load_audio -from paddleaudio.datasets import ESC50 -from paddleaudio.features import melspectrogram from scipy.special import softmax +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.datasets import ESC50 +from paddlespeech.audio.features import melspectrogram + # yapf: disable parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.") diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index 63b22981..e62d58f0 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle -from paddleaudio.datasets import ESC50 +from paddlespeech.audio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index d0b96354..97759a89 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -17,10 +17,10 @@ import os import paddle import paddle.nn.functional as F import yaml -from paddleaudio.backends import load as load_audio -from paddleaudio.features import LogMelSpectrogram -from paddleaudio.utils import logger +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.features import LogMelSpectrogram +from paddlespeech.audio.utils import logger from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index 8e06273d..fba38a01 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -16,10 +16,10 @@ import os import paddle import yaml -from paddleaudio.features import LogMelSpectrogram -from paddleaudio.utils import logger -from paddleaudio.utils import Timer +from paddlespeech.audio.features import LogMelSpectrogram +from paddlespeech.audio.utils import logger +from paddlespeech.audio.utils import Timer from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index b442b2fd..4befe7aa 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,8 +15,9 @@ import os import paddle.nn as nn import paddle.nn.functional as F -from paddleaudio.utils.download import load_state_dict_from_url -from paddleaudio.utils.env import MODEL_HOME + +from paddlespeech.audio.utils import MODEL_HOME +from paddlespeech.audio.utils.download import load_state_dict_from_url __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py index 5a9ca92d..94e45d59 100644 --- a/paddlespeech/kws/exps/mdtc/train.py +++ b/paddlespeech/kws/exps/mdtc/train.py @@ -14,10 +14,10 @@ import os import paddle -from paddleaudio.utils import logger -from paddleaudio.utils import Timer from yacs.config import CfgNode +from paddlespeech.audio.utils import logger +from paddlespeech.audio.utils import Timer from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index 22329d5e..ac5720fd 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -14,10 +14,11 @@ """Contains the audio featurizer class.""" import numpy as np import paddle -import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import mfcc +import paddlespeech.audio.compliance.kaldi as kaldi + class AudioFeaturizer(): """Audio featurizer, for extracting features from audio contents of diff --git a/paddlespeech/s2t/transform/spectrogram.py b/paddlespeech/s2t/transform/spectrogram.py index 2a93bedc..19f0237b 100644 --- a/paddlespeech/s2t/transform/spectrogram.py +++ b/paddlespeech/s2t/transform/spectrogram.py @@ -15,9 +15,10 @@ import librosa import numpy as np import paddle -import paddleaudio.compliance.kaldi as kaldi from python_speech_features import logfbank +import paddlespeech.audio.compliance.kaldi as kaldi + def stft(x, n_fft, diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index 85430370..3c72f55d 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -16,9 +16,9 @@ from collections import OrderedDict import numpy as np import paddle -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor from paddlespeech.server.engine.base_engine import BaseEngine diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index 13f2ddf6..32546a33 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle -import paddleaudio import requests import yaml from paddle.framework import load +import paddlespeech.audio from .entry import client_commands from .entry import server_commands from paddlespeech.cli import download @@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddleaudio.load(params['audio_file']) + _, sr = paddlespeech.audio.load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index e8d91bf3..cd4538bb 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -16,10 +16,10 @@ import os import time import paddle -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index f15dbf9b..6c87dbe7 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -18,10 +18,10 @@ import numpy as np import paddle from paddle.io import BatchSampler from paddle.io import DataLoader -from paddleaudio.metric import compute_eer from tqdm import tqdm from yacs.config import CfgNode +from paddlespeech.audio.metric import compute_eer from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import batch_feature_normalize from paddlespeech.vector.io.dataset import CSVDataset diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index bf014045..961b75e2 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -20,9 +20,9 @@ import paddle from paddle.io import BatchSampler from paddle.io import DataLoader from paddle.io import DistributedBatchSampler -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.io.augment import waveform_augment diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index 1b514f3d..245b2959 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -15,9 +15,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset -from paddleaudio import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram +from paddlespeech.audio import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index bf04e113..12e84577 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -16,9 +16,10 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset -from paddleaudio import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram -from paddleaudio.compliance.librosa import mfcc + +from paddlespeech.audio import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram +from paddlespeech.audio.compliance.librosa import mfcc @dataclass diff --git a/setup.py b/setup.py index 657de6c5..679549b4 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ from setuptools import find_packages from setuptools import setup from setuptools.command.develop import develop from setuptools.command.install import install +from setuptools.command.test import test HERE = Path(os.path.abspath(os.path.dirname(__file__))) @@ -31,42 +32,13 @@ VERSION = '0.0.0' COMMITID = 'none' base = [ - "editdistance", - "g2p_en", - "g2pM", - "h5py", - "inflect", - "jieba", - "jsonlines", - "kaldiio", - "librosa==0.8.1", - "loguru", - "matplotlib", - "nara_wpe", - "onnxruntime", - "pandas", - "paddleaudio", - "paddlenlp", - "paddlespeech_feat", - "praatio==5.0.0", - "pypinyin", - "pypinyin-dict", - "python-dateutil", - "pyworld", - "resampy==0.2.2", - "sacrebleu", - "scipy", - "sentencepiece~=0.1.96", - "soundfile~=0.10", - "textgrid", - "timer", - "tqdm", - "typeguard", - "visualdl", - "webrtcvad", - "yacs~=0.1.8", - "prettytable", - "zhon", + "editdistance", "g2p_en", "g2pM", "h5py", "inflect", "jieba", "jsonlines", + "kaldiio", "librosa==0.8.1", "loguru", "matplotlib", "nara_wpe", + "onnxruntime", "pandas", "paddlenlp", "paddlespeech_feat", "praatio==5.0.0", + "pypinyin", "pypinyin-dict", "python-dateutil", "pyworld", "resampy==0.2.2", + "sacrebleu", "scipy", "sentencepiece~=0.1.96", "soundfile~=0.10", + "textgrid", "timer", "tqdm", "typeguard", "visualdl", "webrtcvad", + "yacs~=0.1.8", "prettytable", "zhon", 'colorlog', 'pathos == 0.2.8' ] server = [ @@ -177,7 +149,19 @@ class InstallCommand(install): install.run(self) - # cmd: python setup.py upload +class TestCommand(test): + def finalize_options(self): + test.finalize_options(self) + self.test_args = [] + self.test_suite = True + + def run_tests(self): + # Run nose ensuring that argv simulates running nosetests directly + import nose + nose.run_exit(argv=['nosetests', '-w', 'tests']) + + +# cmd: python setup.py upload class UploadCommand(Command): description = "Build and publish the package." user_options = [] @@ -279,11 +263,13 @@ setup_info = dict( "sphinx", "sphinx-rtd-theme", "numpydoc", "myst_parser", "recommonmark>=0.5.0", "sphinx-markdown-tables", "sphinx-autobuild" ], + 'test': ['nose', 'torchaudio==0.10.2'], }, cmdclass={ 'develop': DevelopCommand, 'install': InstallCommand, 'upload': UploadCommand, + 'test': TestCommand, }, # Package info diff --git a/audio/tests/benchmark/README.md b/tests/benchmark/audio/README.md similarity index 97% rename from audio/tests/benchmark/README.md rename to tests/benchmark/audio/README.md index b9034100..9cade74e 100644 --- a/audio/tests/benchmark/README.md +++ b/tests/benchmark/audio/README.md @@ -15,7 +15,6 @@ Result: ========================================================================== test session starts ========================================================================== platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0 benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000) -rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0 collected 4 items diff --git a/audio/tests/benchmark/log_melspectrogram.py b/tests/benchmark/audio/log_melspectrogram.py similarity index 87% rename from audio/tests/benchmark/log_melspectrogram.py rename to tests/benchmark/audio/log_melspectrogram.py index 9832aed4..c85fcecf 100644 --- a/audio/tests/benchmark/log_melspectrogram.py +++ b/tests/benchmark/audio/log_melspectrogram.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -55,7 +57,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -log_mel_extractor = paddleaudio.features.LogMelSpectrogram( +log_mel_extractor = paddlespeech.audio.features.LogMelSpectrogram( **mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype) @@ -65,20 +67,20 @@ def log_melspectrogram(): def test_log_melspect_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(log_melspectrogram) + feature_audio = benchmark(log_melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_log_melspect_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(log_melspectrogram) + feature_audio = benchmark(log_melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=2) + feature_librosa, feature_audio, decimal=2) mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( @@ -102,11 +104,11 @@ def test_log_melspect_cpu_torchaudio(benchmark): waveform_tensor_torch = waveform_tensor_torch.to('cpu') amplitude_to_DB = amplitude_to_DB.to('cpu') - feature_paddleaudio = benchmark(log_melspectrogram_torchaudio) + feature_audio = benchmark(log_melspectrogram_torchaudio) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_log_melspect_gpu_torchaudio(benchmark): diff --git a/audio/tests/benchmark/melspectrogram.py b/tests/benchmark/audio/melspectrogram.py similarity index 85% rename from audio/tests/benchmark/melspectrogram.py rename to tests/benchmark/audio/melspectrogram.py index 5fe3f248..49815894 100644 --- a/audio/tests/benchmark/melspectrogram.py +++ b/tests/benchmark/audio/melspectrogram.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -55,7 +57,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -mel_extractor = paddleaudio.features.MelSpectrogram( +mel_extractor = paddlespeech.audio.features.MelSpectrogram( **mel_conf, f_min=0.0, dtype=waveform_tensor.dtype) @@ -65,18 +67,18 @@ def melspectrogram(): def test_melspect_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(melspectrogram) + feature_audio = benchmark(melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_melspect_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(melspectrogram) + feature_audio = benchmark(melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( @@ -91,10 +93,10 @@ def test_melspect_cpu_torchaudio(benchmark): global waveform_tensor_torch, mel_extractor_torchaudio mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu') waveform_tensor_torch = waveform_tensor_torch.to('cpu') - feature_paddleaudio = benchmark(melspectrogram_torchaudio) + feature_audio = benchmark(melspectrogram_torchaudio) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_melspect_gpu_torchaudio(benchmark): diff --git a/audio/tests/benchmark/mfcc.py b/tests/benchmark/audio/mfcc.py similarity index 87% rename from audio/tests/benchmark/mfcc.py rename to tests/benchmark/audio/mfcc.py index c6a8c85f..4e286de9 100644 --- a/audio/tests/benchmark/mfcc.py +++ b/tests/benchmark/audio/mfcc.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -64,7 +66,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -mfcc_extractor = paddleaudio.features.MFCC( +mfcc_extractor = paddlespeech.audio.features.MFCC( **mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype) @@ -74,18 +76,18 @@ def mfcc(): def test_mfcc_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(mfcc) + feature_audio = benchmark(mfcc) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_mfcc_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(mfcc) + feature_audio = benchmark(mfcc) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) del mel_conf_torchaudio['sample_rate'] @@ -103,10 +105,10 @@ def test_mfcc_cpu_torchaudio(benchmark): mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu') waveform_tensor_torch = waveform_tensor_torch.to('cpu') - feature_paddleaudio = benchmark(mfcc_torchaudio) + feature_audio = benchmark(mfcc_torchaudio) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_mfcc_gpu_torchaudio(benchmark): diff --git a/audio/tests/backends/__init__.py b/tests/unit/audio/backends/__init__.py similarity index 100% rename from audio/tests/backends/__init__.py rename to tests/unit/audio/backends/__init__.py diff --git a/audio/tests/backends/base.py b/tests/unit/audio/backends/base.py similarity index 100% rename from audio/tests/backends/base.py rename to tests/unit/audio/backends/base.py diff --git a/audio/tests/backends/soundfile/__init__.py b/tests/unit/audio/backends/soundfile/__init__.py similarity index 100% rename from audio/tests/backends/soundfile/__init__.py rename to tests/unit/audio/backends/soundfile/__init__.py diff --git a/audio/tests/backends/soundfile/test_io.py b/tests/unit/audio/backends/soundfile/test_io.py similarity index 90% rename from audio/tests/backends/soundfile/test_io.py rename to tests/unit/audio/backends/soundfile/test_io.py index 9d092902..26276751 100644 --- a/audio/tests/backends/soundfile/test_io.py +++ b/tests/unit/audio/backends/soundfile/test_io.py @@ -16,16 +16,16 @@ import os import unittest import numpy as np -import paddleaudio import soundfile as sf +import paddlespeech.audio from ..base import BackendTest class TestIO(BackendTest): def test_load_mono_channel(self): sf_data, sf_sr = sf.read(self.files[0]) - pa_data, pa_sr = paddleaudio.load( + pa_data, pa_sr = paddlespeech.audio.load( self.files[0], normal=False, dtype='float64') self.assertEqual(sf_data.dtype, pa_data.dtype) @@ -35,7 +35,7 @@ class TestIO(BackendTest): def test_load_multi_channels(self): sf_data, sf_sr = sf.read(self.files[1]) sf_data = sf_data.T # Channel dim first - pa_data, pa_sr = paddleaudio.load( + pa_data, pa_sr = paddlespeech.audio.load( self.files[1], mono=False, normal=False, dtype='float64') self.assertEqual(sf_data.dtype, pa_data.dtype) @@ -49,7 +49,7 @@ class TestIO(BackendTest): pa_tmp_file = 'pa_tmp.wav' sf.write(sf_tmp_file, waveform, sr) - paddleaudio.save(waveform, sr, pa_tmp_file) + paddlespeech.audio.save(waveform, sr, pa_tmp_file) self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) for file in [sf_tmp_file, pa_tmp_file]: @@ -62,7 +62,7 @@ class TestIO(BackendTest): pa_tmp_file = 'pa_tmp.wav' sf.write(sf_tmp_file, waveform.T, sr) - paddleaudio.save(waveform.T, sr, pa_tmp_file) + paddlespeech.audio.save(waveform.T, sr, pa_tmp_file) self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) for file in [sf_tmp_file, pa_tmp_file]: diff --git a/audio/tests/features/__init__.py b/tests/unit/audio/features/__init__.py similarity index 100% rename from audio/tests/features/__init__.py rename to tests/unit/audio/features/__init__.py diff --git a/audio/tests/features/base.py b/tests/unit/audio/features/base.py similarity index 97% rename from audio/tests/features/base.py rename to tests/unit/audio/features/base.py index 476f6b8e..6d59f72b 100644 --- a/audio/tests/features/base.py +++ b/tests/unit/audio/features/base.py @@ -17,7 +17,8 @@ import urllib.request import numpy as np import paddle -from paddleaudio import load + +from paddlespeech.audio import load wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' diff --git a/audio/tests/features/test_istft.py b/tests/unit/audio/features/test_istft.py similarity index 96% rename from audio/tests/features/test_istft.py rename to tests/unit/audio/features/test_istft.py index 9cf8cdd6..f1e6e4e3 100644 --- a/audio/tests/features/test_istft.py +++ b/tests/unit/audio/features/test_istft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle -from paddleaudio.functional.window import get_window from .base import FeatTest +from paddlespeech.audio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import IStft from paddlespeech.s2t.transform.spectrogram import Stft diff --git a/audio/tests/features/test_kaldi.py b/tests/unit/audio/features/test_kaldi.py similarity index 87% rename from audio/tests/features/test_kaldi.py rename to tests/unit/audio/features/test_kaldi.py index 00a576f6..2b0ece89 100644 --- a/audio/tests/features/test_kaldi.py +++ b/tests/unit/audio/features/test_kaldi.py @@ -15,10 +15,10 @@ import unittest import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio from .base import FeatTest @@ -40,17 +40,17 @@ class TestKaldi(FeatTest): self.window_size, periodic=False, dtype=eval(f'torch.{self.dtype}')).pow(0.85) - p_hann_window = paddleaudio.functional.window.get_window( + p_hann_window = paddlespeech.audio.functional.window.get_window( 'hann', self.window_size, fftbins=False, dtype=eval(f'paddle.{self.dtype}')) - p_hamm_window = paddleaudio.functional.window.get_window( + p_hamm_window = paddlespeech.audio.functional.window.get_window( 'hamming', self.window_size, fftbins=False, dtype=eval(f'paddle.{self.dtype}')) - p_povey_window = paddleaudio.functional.window.get_window( + p_povey_window = paddlespeech.audio.functional.window.get_window( 'hann', self.window_size, fftbins=False, @@ -63,7 +63,7 @@ class TestKaldi(FeatTest): def test_fbank(self): ta_features = torchaudio.compliance.kaldi.fbank( torch.from_numpy(self.waveform.astype(self.dtype))) - pa_features = paddleaudio.compliance.kaldi.fbank( + pa_features = paddlespeech.audio.compliance.kaldi.fbank( paddle.to_tensor(self.waveform.astype(self.dtype))) np.testing.assert_array_almost_equal( ta_features, pa_features, decimal=4) @@ -71,7 +71,7 @@ class TestKaldi(FeatTest): def test_mfcc(self): ta_features = torchaudio.compliance.kaldi.mfcc( torch.from_numpy(self.waveform.astype(self.dtype))) - pa_features = paddleaudio.compliance.kaldi.mfcc( + pa_features = paddlespeech.audio.compliance.kaldi.mfcc( paddle.to_tensor(self.waveform.astype(self.dtype))) np.testing.assert_array_almost_equal( ta_features, pa_features, decimal=4) diff --git a/audio/tests/features/test_librosa.py b/tests/unit/audio/features/test_librosa.py similarity index 89% rename from audio/tests/features/test_librosa.py rename to tests/unit/audio/features/test_librosa.py index a1d3e840..ffdec3e7 100644 --- a/audio/tests/features/test_librosa.py +++ b/tests/unit/audio/features/test_librosa.py @@ -16,10 +16,10 @@ import unittest import librosa import numpy as np import paddle -import paddleaudio -from paddleaudio.functional.window import get_window +import paddlespeech.audio from .base import FeatTest +from paddlespeech.audio.functional.window import get_window class TestLibrosa(FeatTest): @@ -117,7 +117,7 @@ class TestLibrosa(FeatTest): htk=False, norm='slaney', dtype=self.waveform.dtype, ) - feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix( + feature_compliance = paddlespeech.audio.compliance.librosa.compute_fbank_matrix( sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels, @@ -127,7 +127,7 @@ class TestLibrosa(FeatTest): norm='slaney', dtype=self.waveform.dtype, ) x = paddle.to_tensor(self.waveform) - feature_functional = paddleaudio.functional.compute_fbank_matrix( + feature_functional = paddlespeech.audio.functional.compute_fbank_matrix( sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels, @@ -156,8 +156,8 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram( x=self.waveform, sr=self.sr, window_size=self.n_fft, @@ -166,10 +166,10 @@ class TestLibrosa(FeatTest): fmin=self.fmin, to_db=False) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.MelSpectrogram( + feature_extractor = paddlespeech.audio.features.MelSpectrogram( sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, @@ -198,8 +198,8 @@ class TestLibrosa(FeatTest): fmin=self.fmin) feature_librosa = librosa.power_to_db(feature_librosa, top_db=None) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram( x=self.waveform, sr=self.sr, window_size=self.n_fft, @@ -207,10 +207,10 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.LogMelSpectrogram( + feature_extractor = paddlespeech.audio.features.LogMelSpectrogram( sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, @@ -243,8 +243,8 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.mfcc( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.mfcc( x=self.waveform, sr=self.sr, n_mfcc=self.n_mfcc, @@ -257,10 +257,10 @@ class TestLibrosa(FeatTest): fmin=self.fmin, top_db=self.top_db) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.MFCC( + feature_extractor = paddlespeech.audio.features.MFCC( sr=self.sr, n_mfcc=self.n_mfcc, n_fft=self.n_fft, diff --git a/audio/tests/features/test_log_melspectrogram.py b/tests/unit/audio/features/test_log_melspectrogram.py similarity index 90% rename from audio/tests/features/test_log_melspectrogram.py rename to tests/unit/audio/features/test_log_melspectrogram.py index 0383c2b8..59eb73e8 100644 --- a/audio/tests/features/test_log_melspectrogram.py +++ b/tests/unit/audio/features/test_log_melspectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle -import paddleaudio +import paddlespeech.audio from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram @@ -33,8 +33,7 @@ class TestLogMelSpectrogram(FeatTest): ps_res = ps_melspect(self.waveform.T).squeeze(1).T x = paddle.to_tensor(self.waveform) - # paddlespeech.s2t的特征存在幅度谱和功率谱滥用的情况 - ps_melspect = paddleaudio.features.LogMelSpectrogram( + ps_melspect = paddlespeech.audio.features.LogMelSpectrogram( self.sr, self.n_fft, self.hop_length, diff --git a/audio/tests/features/test_spectrogram.py b/tests/unit/audio/features/test_spectrogram.py similarity index 93% rename from audio/tests/features/test_spectrogram.py rename to tests/unit/audio/features/test_spectrogram.py index 1774fe61..7d908a7e 100644 --- a/audio/tests/features/test_spectrogram.py +++ b/tests/unit/audio/features/test_spectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle -import paddleaudio +import paddlespeech.audio from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import Spectrogram @@ -31,7 +31,7 @@ class TestSpectrogram(FeatTest): ps_res = ps_spect(self.waveform.T).squeeze(1).T # Magnitude x = paddle.to_tensor(self.waveform) - pa_spect = paddleaudio.features.Spectrogram( + pa_spect = paddlespeech.audio.features.Spectrogram( self.n_fft, self.hop_length, power=1.0) pa_res = pa_spect(x).squeeze(0).numpy() diff --git a/audio/tests/features/test_stft.py b/tests/unit/audio/features/test_stft.py similarity index 95% rename from audio/tests/features/test_stft.py rename to tests/unit/audio/features/test_stft.py index 58792ffe..03448ca8 100644 --- a/audio/tests/features/test_stft.py +++ b/tests/unit/audio/features/test_stft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle -from paddleaudio.functional.window import get_window from .base import FeatTest +from paddlespeech.audio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import Stft