Refactor paddleaudio to paddlespeech.audio

pull/2007/head
KP 2 years ago
parent 8641608f08
commit bf056c013d

@ -52,7 +52,7 @@ pull_request_rules:
add: ["T2S"]
- name: "auto add label=Audio"
conditions:
- files~=^paddleaudio/
- files~=^paddlespeech/audio/
actions:
label:
add: ["Audio"]
@ -100,7 +100,7 @@ pull_request_rules:
add: ["README"]
- name: "auto add label=Documentation"
conditions:
- files~=^(docs/|CHANGELOG.md|paddleaudio/CHANGELOG.md)
- files~=^(docs/|CHANGELOG.md)
actions:
label:
add: ["Documentation"]

2
audio/.gitignore vendored

@ -1,2 +0,0 @@
.eggs
*.wav

@ -1,9 +0,0 @@
# Changelog
Date: 2022-3-15, Author: Xiaojie Chen.
- kaldi and librosa mfcc, fbank, spectrogram.
- unit test and benchmark.
Date: 2022-2-25, Author: Hui Zhang.
- Refactor architecture.
- dtw distance and mcd style dtw.

@ -1,7 +0,0 @@
# PaddleAudio
PaddleAudio is an audio library for PaddlePaddle.
## Install
`pip install .`

@ -1,19 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

@ -1,24 +0,0 @@
# Build docs for PaddleAudio
Execute the following steps in **current directory**.
## 1. Install
`pip install Sphinx sphinx_rtd_theme`
## 2. Generate API docs
Generate API docs from doc string.
`sphinx-apidoc -fMeT -o source ../paddleaudio ../paddleaudio/utils --templatedir source/_templates`
## 3. Build
`sphinx-build source _html`
## 4. Preview
Open `_html/index.html` for page preview.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.9 KiB

@ -1,35 +0,0 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd

@ -1,60 +0,0 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
This module is used to store environmental variables in PaddleAudio.
PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the
default value through the PPAUDIO_HOME environment variable.
MODEL_HOME --> Store model files.
DATA_HOME --> Store automatically downloaded datasets.
'''
import os
__all__ = [
'USER_HOME',
'PPAUDIO_HOME',
'MODEL_HOME',
'DATA_HOME',
]
def _get_user_home():
return os.path.expanduser('~')
def _get_ppaudio_home():
if 'PPAUDIO_HOME' in os.environ:
home_path = os.environ['PPAUDIO_HOME']
if os.path.exists(home_path):
if os.path.isdir(home_path):
return home_path
else:
raise RuntimeError(
'The environment variable PPAUDIO_HOME {} is not a directory.'.
format(home_path))
else:
return home_path
return os.path.join(_get_user_home(), '.paddleaudio')
def _get_sub_home(directory):
home = os.path.join(_get_ppaudio_home(), directory)
if not os.path.exists(home):
os.makedirs(home)
return home
USER_HOME = _get_user_home()
PPAUDIO_HOME = _get_ppaudio_home()
MODEL_HOME = _get_sub_home('models')
DATA_HOME = _get_sub_home('datasets')

@ -1,99 +0,0 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os
import setuptools
from setuptools.command.install import install
from setuptools.command.test import test
# set the version here
VERSION = '0.0.0'
# Inspired by the example at https://pytest.org/latest/goodpractises.html
class TestCommand(test):
def finalize_options(self):
test.finalize_options(self)
self.test_args = []
self.test_suite = True
def run(self):
self.run_benchmark()
super(TestCommand, self).run()
def run_tests(self):
# Run nose ensuring that argv simulates running nosetests directly
import nose
nose.run_exit(argv=['nosetests', '-w', 'tests'])
def run_benchmark(self):
for benchmark_item in glob.glob('tests/benchmark/*py'):
os.system(f'pytest {benchmark_item}')
class InstallCommand(install):
def run(self):
install.run(self)
def write_version_py(filename='paddleaudio/__init__.py'):
with open(filename, "a") as f:
f.write(f"__version__ = '{VERSION}'")
def remove_version_py(filename='paddleaudio/__init__.py'):
with open(filename, "r") as f:
lines = f.readlines()
with open(filename, "w") as f:
for line in lines:
if "__version__" not in line:
f.write(line)
remove_version_py()
write_version_py()
setuptools.setup(
name="paddleaudio",
version=VERSION,
author="",
author_email="",
description="PaddleAudio, in development",
long_description="",
long_description_content_type="text/markdown",
url="",
packages=setuptools.find_packages(include=['paddleaudio*']),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires='>=3.6',
install_requires=[
'numpy >= 1.15.0', 'scipy >= 1.0.0', 'resampy >= 0.2.2',
'soundfile >= 0.9.0', 'colorlog', 'pathos == 0.2.8'
],
extras_require={
'test': [
'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1',
'torchaudio==0.10.2', 'pytest-benchmark'
],
},
cmdclass={
'install': InstallCommand,
'test': TestCommand,
}, )
remove_version_py()

@ -89,7 +89,7 @@ Then to start the system server, and it provides HTTP backend services.
Then start the server with Fastapi.
```bash
export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio
export PYTHONPATH=$PYTHONPATH:./src
python src/audio_search.py
```

@ -91,7 +91,7 @@ ffce340b3790 minio/minio:RELEASE.2020-12-03T00-03-10Z "/usr/bin/docker-ent…"
启动用 Fastapi 构建的服务
```bash
export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio
export PYTHONPATH=$PYTHONPATH:./src
python src/audio_search.py
```

@ -1,8 +1,8 @@
# Customize Dataset for Audio Classification
Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech` and `paddleaudio`.
Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech`.
A base class of classification dataset is `paddleaudio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`.
A base class of classification dataset is `paddlespeech.audio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`.
Assuming you have some wave files that stored in your own directory. You should prepare a meta file with the information of filepaths and labels. For example the absolute path of it is `/PATH/TO/META_FILE.txt`:
```
@ -14,7 +14,7 @@ Assuming you have some wave files that stored in your own directory. You should
Here is an example to build your custom dataset in `custom_dataset.py`:
```python
from paddleaudio.datasets.dataset import AudioClassificationDataset
from paddlespeech.audio.datasets.dataset import AudioClassificationDataset
class CustomDataset(AudioClassificationDataset):
meta_file = '/PATH/TO/META_FILE.txt'
@ -48,7 +48,7 @@ class CustomDataset(AudioClassificationDataset):
Then you can build dataset and data loader from `CustomDataset`:
```python
import paddle
from paddleaudio.features import LogMelSpectrogram
from paddlespeech.audio.features import LogMelSpectrogram
from custom_dataset import CustomDataset

@ -1,5 +1,5 @@
data:
dataset: 'paddleaudio.datasets:ESC50'
dataset: 'paddlespeech.audio.datasets:ESC50'
num_classes: 50
train:
mode: 'train'

@ -2,7 +2,7 @@
###########################################
# Data #
###########################################
dataset: 'paddleaudio.datasets:HeySnips'
dataset: 'paddlespeech.audio.datasets:HeySnips'
data_dir: '/PATH/TO/DATA/hey_snips_research_6k_en_train_eval_clean_ter'
############################################

@ -14,9 +14,9 @@
import argparse
import paddle
from paddleaudio.datasets.voxceleb import VoxCeleb
from yacs.config import CfgNode
from paddlespeech.audio.datasets.voxceleb import VoxCeleb
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.augment import build_augment_pipeline
from paddlespeech.vector.training.seeding import seed_everything

@ -21,9 +21,9 @@ import os
from typing import List
import tqdm
from paddleaudio import load as load_audio
from yacs.config import CfgNode
from paddlespeech.audio import load as load_audio
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.utils.vector_utils import get_chunks

@ -22,9 +22,9 @@ import os
import random
import tqdm
from paddleaudio import load as load_audio
from yacs.config import CfgNode
from paddlespeech.audio import load as load_audio
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.utils.vector_utils import get_chunks

@ -11,13 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ...cli.utils import DATA_HOME
from ...cli.utils import MODEL_HOME
from .download import decompress
from .download import download_and_decompress
from .download import load_state_dict_from_url
from .env import DATA_HOME
from .env import MODEL_HOME
from .env import PPAUDIO_HOME
from .env import USER_HOME
from .error import ParameterError
from .log import Logger
from .log import logger

@ -27,6 +27,8 @@ from paddleaudio.features import LogMelSpectrogram
from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio import load
from paddlespeech.audio.features import LogMelSpectrogram
__all__ = ['CLSExecutor']

@ -24,11 +24,11 @@ from typing import Any
from typing import Dict
import paddle
import paddleaudio
import requests
import yaml
from paddle.framework import load
import paddlespeech.audio
from . import download
from .entry import commands
try:
@ -190,6 +190,7 @@ def _get_sub_home(directory):
PPSPEECH_HOME = _get_paddlespcceh_home()
MODEL_HOME = _get_sub_home('models')
CONF_HOME = _get_sub_home('conf')
DATA_HOME = _get_sub_home('datasets')
def _md5(text: str):
@ -281,7 +282,7 @@ def _note_one_stat(cls_name, params={}):
if 'audio_file' in params:
try:
_, sr = paddleaudio.load(params['audio_file'])
_, sr = paddlespeech.audio.load(params['audio_file'])
except Exception:
sr = -1

@ -29,6 +29,8 @@ from yacs.config import CfgNode
from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio.backends import load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.vector.io.batch import feature_normalize
from paddlespeech.vector.modules.sid_model import SpeakerIdetification

@ -16,11 +16,12 @@ import os
import numpy as np
from paddle import inference
from paddleaudio.backends import load as load_audio
from paddleaudio.datasets import ESC50
from paddleaudio.features import melspectrogram
from scipy.special import softmax
from paddlespeech.audio.backends import load as load_audio
from paddlespeech.audio.datasets import ESC50
from paddlespeech.audio.features import melspectrogram
# yapf: disable
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.")

@ -15,8 +15,8 @@ import argparse
import os
import paddle
from paddleaudio.datasets import ESC50
from paddlespeech.audio.datasets import ESC50
from paddlespeech.cls.models import cnn14
from paddlespeech.cls.models import SoundClassifier

@ -17,10 +17,10 @@ import os
import paddle
import paddle.nn.functional as F
import yaml
from paddleaudio.backends import load as load_audio
from paddleaudio.features import LogMelSpectrogram
from paddleaudio.utils import logger
from paddlespeech.audio.backends import load as load_audio
from paddlespeech.audio.features import LogMelSpectrogram
from paddlespeech.audio.utils import logger
from paddlespeech.cls.models import SoundClassifier
from paddlespeech.utils.dynamic_import import dynamic_import

@ -16,10 +16,10 @@ import os
import paddle
import yaml
from paddleaudio.features import LogMelSpectrogram
from paddleaudio.utils import logger
from paddleaudio.utils import Timer
from paddlespeech.audio.features import LogMelSpectrogram
from paddlespeech.audio.utils import logger
from paddlespeech.audio.utils import Timer
from paddlespeech.cls.models import SoundClassifier
from paddlespeech.utils.dynamic_import import dynamic_import

@ -15,8 +15,9 @@ import os
import paddle.nn as nn
import paddle.nn.functional as F
from paddleaudio.utils.download import load_state_dict_from_url
from paddleaudio.utils.env import MODEL_HOME
from paddlespeech.audio.utils.download import load_state_dict_from_url
from paddlespeech.audio.utils.env import MODEL_HOME
__all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']

@ -14,10 +14,10 @@
import os
import paddle
from paddleaudio.utils import logger
from paddleaudio.utils import Timer
from yacs.config import CfgNode
from paddlespeech.audio.utils import logger
from paddlespeech.audio.utils import Timer
from paddlespeech.kws.exps.mdtc.collate import collate_features
from paddlespeech.kws.models.loss import max_pooling_loss
from paddlespeech.kws.models.mdtc import KWSModel

@ -14,10 +14,11 @@
"""Contains the audio featurizer class."""
import numpy as np
import paddle
import paddleaudio.compliance.kaldi as kaldi
from python_speech_features import delta
from python_speech_features import mfcc
import paddlespeech.audio.compliance.kaldi as kaldi
class AudioFeaturizer():
"""Audio featurizer, for extracting features from audio contents of

@ -15,9 +15,10 @@
import librosa
import numpy as np
import paddle
import paddleaudio.compliance.kaldi as kaldi
from python_speech_features import logfbank
import paddlespeech.audio.compliance.kaldi as kaldi
def stft(x,
n_fft,

@ -16,9 +16,9 @@ from collections import OrderedDict
import numpy as np
import paddle
from paddleaudio.backends import load as load_audio
from paddleaudio.compliance.librosa import melspectrogram
from paddlespeech.audio.backends import load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.cli.log import logger
from paddlespeech.cli.vector.infer import VectorExecutor
from paddlespeech.server.engine.base_engine import BaseEngine

@ -24,11 +24,11 @@ from typing import Any
from typing import Dict
import paddle
import paddleaudio
import requests
import yaml
from paddle.framework import load
import paddlespeech.audio
from .entry import client_commands
from .entry import server_commands
from paddlespeech.cli import download
@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}):
if 'audio_file' in params:
try:
_, sr = paddleaudio.load(params['audio_file'])
_, sr = paddlespeech.audio.load(params['audio_file'])
except Exception:
sr = -1

@ -16,10 +16,10 @@ import os
import time
import paddle
from paddleaudio.backends import load as load_audio
from paddleaudio.compliance.librosa import melspectrogram
from yacs.config import CfgNode
from paddlespeech.audio.backends import load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.batch import feature_normalize
from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn

@ -18,10 +18,10 @@ import numpy as np
import paddle
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddleaudio.metric import compute_eer
from tqdm import tqdm
from yacs.config import CfgNode
from paddlespeech.audio.metric import compute_eer
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.batch import batch_feature_normalize
from paddlespeech.vector.io.dataset import CSVDataset

@ -20,9 +20,9 @@ import paddle
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddleaudio.compliance.librosa import melspectrogram
from yacs.config import CfgNode
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.augment import build_augment_pipeline
from paddlespeech.vector.io.augment import waveform_augment

@ -15,9 +15,9 @@ from dataclasses import dataclass
from dataclasses import fields
from paddle.io import Dataset
from paddleaudio import load as load_audio
from paddleaudio.compliance.librosa import melspectrogram
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()

@ -16,9 +16,10 @@ from dataclasses import dataclass
from dataclasses import fields
from paddle.io import Dataset
from paddleaudio import load as load_audio
from paddleaudio.compliance.librosa import melspectrogram
from paddleaudio.compliance.librosa import mfcc
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.audio.compliance.librosa import mfcc
@dataclass

@ -24,6 +24,7 @@ from setuptools import find_packages
from setuptools import setup
from setuptools.command.develop import develop
from setuptools.command.install import install
from setuptools.command.test import test
HERE = Path(os.path.abspath(os.path.dirname(__file__)))
@ -31,42 +32,13 @@ VERSION = '0.0.0'
COMMITID = 'none'
base = [
"editdistance",
"g2p_en",
"g2pM",
"h5py",
"inflect",
"jieba",
"jsonlines",
"kaldiio",
"librosa==0.8.1",
"loguru",
"matplotlib",
"nara_wpe",
"onnxruntime",
"pandas",
"paddleaudio",
"paddlenlp",
"paddlespeech_feat",
"praatio==5.0.0",
"pypinyin",
"pypinyin-dict",
"python-dateutil",
"pyworld",
"resampy==0.2.2",
"sacrebleu",
"scipy",
"sentencepiece~=0.1.96",
"soundfile~=0.10",
"textgrid",
"timer",
"tqdm",
"typeguard",
"visualdl",
"webrtcvad",
"yacs~=0.1.8",
"prettytable",
"zhon",
"editdistance", "g2p_en", "g2pM", "h5py", "inflect", "jieba", "jsonlines",
"kaldiio", "librosa==0.8.1", "loguru", "matplotlib", "nara_wpe",
"onnxruntime", "pandas", "paddlenlp", "paddlespeech_feat", "praatio==5.0.0",
"pypinyin", "pypinyin-dict", "python-dateutil", "pyworld", "resampy==0.2.2",
"sacrebleu", "scipy", "sentencepiece~=0.1.96", "soundfile~=0.10",
"textgrid", "timer", "tqdm", "typeguard", "visualdl", "webrtcvad",
"yacs~=0.1.8", "prettytable", "zhon", 'colorlog', 'pathos == 0.2.8'
]
server = [
@ -177,7 +149,19 @@ class InstallCommand(install):
install.run(self)
# cmd: python setup.py upload
class TestCommand(test):
def finalize_options(self):
test.finalize_options(self)
self.test_args = []
self.test_suite = True
def run_tests(self):
# Run nose ensuring that argv simulates running nosetests directly
import nose
nose.run_exit(argv=['nosetests', '-w', 'tests'])
# cmd: python setup.py upload
class UploadCommand(Command):
description = "Build and publish the package."
user_options = []
@ -279,11 +263,13 @@ setup_info = dict(
"sphinx", "sphinx-rtd-theme", "numpydoc", "myst_parser",
"recommonmark>=0.5.0", "sphinx-markdown-tables", "sphinx-autobuild"
],
'test': ['nose', 'torchaudio==0.10.2'],
},
cmdclass={
'develop': DevelopCommand,
'install': InstallCommand,
'upload': UploadCommand,
'test': TestCommand,
},
# Package info

@ -15,7 +15,6 @@ Result:
========================================================================== test session starts ==========================================================================
platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0
benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio
plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0
collected 4 items

@ -17,15 +17,17 @@ import urllib.request
import librosa
import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio
import paddlespeech.audio
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
waveform, sr = paddlespeech.audio.load(
os.path.abspath(os.path.basename(wav_url)))
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
@ -55,7 +57,7 @@ def enable_gpu_device():
paddle.set_device('gpu')
log_mel_extractor = paddleaudio.features.LogMelSpectrogram(
log_mel_extractor = paddlespeech.audio.features.LogMelSpectrogram(
**mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype)
@ -65,20 +67,20 @@ def log_melspectrogram():
def test_log_melspect_cpu(benchmark):
enable_cpu_device()
feature_paddleaudio = benchmark(log_melspectrogram)
feature_audio = benchmark(log_melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
def test_log_melspect_gpu(benchmark):
enable_gpu_device()
feature_paddleaudio = benchmark(log_melspectrogram)
feature_audio = benchmark(log_melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=2)
feature_librosa, feature_audio, decimal=2)
mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
@ -102,11 +104,11 @@ def test_log_melspect_cpu_torchaudio(benchmark):
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
amplitude_to_DB = amplitude_to_DB.to('cpu')
feature_paddleaudio = benchmark(log_melspectrogram_torchaudio)
feature_audio = benchmark(log_melspectrogram_torchaudio)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
def test_log_melspect_gpu_torchaudio(benchmark):

@ -17,15 +17,17 @@ import urllib.request
import librosa
import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio
import paddlespeech.audio
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
waveform, sr = paddlespeech.audio.load(
os.path.abspath(os.path.basename(wav_url)))
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
@ -55,7 +57,7 @@ def enable_gpu_device():
paddle.set_device('gpu')
mel_extractor = paddleaudio.features.MelSpectrogram(
mel_extractor = paddlespeech.audio.features.MelSpectrogram(
**mel_conf, f_min=0.0, dtype=waveform_tensor.dtype)
@ -65,18 +67,18 @@ def melspectrogram():
def test_melspect_cpu(benchmark):
enable_cpu_device()
feature_paddleaudio = benchmark(melspectrogram)
feature_audio = benchmark(melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
def test_melspect_gpu(benchmark):
enable_gpu_device()
feature_paddleaudio = benchmark(melspectrogram)
feature_audio = benchmark(melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
@ -91,10 +93,10 @@ def test_melspect_cpu_torchaudio(benchmark):
global waveform_tensor_torch, mel_extractor_torchaudio
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
feature_paddleaudio = benchmark(melspectrogram_torchaudio)
feature_audio = benchmark(melspectrogram_torchaudio)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
def test_melspect_gpu_torchaudio(benchmark):

@ -17,15 +17,17 @@ import urllib.request
import librosa
import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio
import paddlespeech.audio
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
waveform, sr = paddlespeech.audio.load(
os.path.abspath(os.path.basename(wav_url)))
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
@ -64,7 +66,7 @@ def enable_gpu_device():
paddle.set_device('gpu')
mfcc_extractor = paddleaudio.features.MFCC(
mfcc_extractor = paddlespeech.audio.features.MFCC(
**mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype)
@ -74,18 +76,18 @@ def mfcc():
def test_mfcc_cpu(benchmark):
enable_cpu_device()
feature_paddleaudio = benchmark(mfcc)
feature_audio = benchmark(mfcc)
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
def test_mfcc_gpu(benchmark):
enable_gpu_device()
feature_paddleaudio = benchmark(mfcc)
feature_audio = benchmark(mfcc)
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
del mel_conf_torchaudio['sample_rate']
@ -103,10 +105,10 @@ def test_mfcc_cpu_torchaudio(benchmark):
mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu')
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
feature_paddleaudio = benchmark(mfcc_torchaudio)
feature_audio = benchmark(mfcc_torchaudio)
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
feature_librosa, feature_audio, decimal=3)
def test_mfcc_gpu_torchaudio(benchmark):

@ -16,16 +16,16 @@ import os
import unittest
import numpy as np
import paddleaudio
import soundfile as sf
import paddlespeech.audio
from ..base import BackendTest
class TestIO(BackendTest):
def test_load_mono_channel(self):
sf_data, sf_sr = sf.read(self.files[0])
pa_data, pa_sr = paddleaudio.load(
pa_data, pa_sr = paddlespeech.audio.load(
self.files[0], normal=False, dtype='float64')
self.assertEqual(sf_data.dtype, pa_data.dtype)
@ -35,7 +35,7 @@ class TestIO(BackendTest):
def test_load_multi_channels(self):
sf_data, sf_sr = sf.read(self.files[1])
sf_data = sf_data.T # Channel dim first
pa_data, pa_sr = paddleaudio.load(
pa_data, pa_sr = paddlespeech.audio.load(
self.files[1], mono=False, normal=False, dtype='float64')
self.assertEqual(sf_data.dtype, pa_data.dtype)
@ -49,7 +49,7 @@ class TestIO(BackendTest):
pa_tmp_file = 'pa_tmp.wav'
sf.write(sf_tmp_file, waveform, sr)
paddleaudio.save(waveform, sr, pa_tmp_file)
paddlespeech.audio.save(waveform, sr, pa_tmp_file)
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
for file in [sf_tmp_file, pa_tmp_file]:
@ -62,7 +62,7 @@ class TestIO(BackendTest):
pa_tmp_file = 'pa_tmp.wav'
sf.write(sf_tmp_file, waveform.T, sr)
paddleaudio.save(waveform.T, sr, pa_tmp_file)
paddlespeech.audio.save(waveform.T, sr, pa_tmp_file)
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
for file in [sf_tmp_file, pa_tmp_file]:

@ -17,7 +17,8 @@ import urllib.request
import numpy as np
import paddle
from paddleaudio import load
from paddlespeech.audio import load
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'

@ -15,9 +15,9 @@ import unittest
import numpy as np
import paddle
from paddleaudio.functional.window import get_window
from .base import FeatTest
from paddlespeech.audio.functional.window import get_window
from paddlespeech.s2t.transform.spectrogram import IStft
from paddlespeech.s2t.transform.spectrogram import Stft

@ -15,10 +15,10 @@ import unittest
import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio
import paddlespeech.audio
from .base import FeatTest
@ -40,17 +40,17 @@ class TestKaldi(FeatTest):
self.window_size, periodic=False,
dtype=eval(f'torch.{self.dtype}')).pow(0.85)
p_hann_window = paddleaudio.functional.window.get_window(
p_hann_window = paddlespeech.audio.functional.window.get_window(
'hann',
self.window_size,
fftbins=False,
dtype=eval(f'paddle.{self.dtype}'))
p_hamm_window = paddleaudio.functional.window.get_window(
p_hamm_window = paddlespeech.audio.functional.window.get_window(
'hamming',
self.window_size,
fftbins=False,
dtype=eval(f'paddle.{self.dtype}'))
p_povey_window = paddleaudio.functional.window.get_window(
p_povey_window = paddlespeech.audio.functional.window.get_window(
'hann',
self.window_size,
fftbins=False,
@ -63,7 +63,7 @@ class TestKaldi(FeatTest):
def test_fbank(self):
ta_features = torchaudio.compliance.kaldi.fbank(
torch.from_numpy(self.waveform.astype(self.dtype)))
pa_features = paddleaudio.compliance.kaldi.fbank(
pa_features = paddlespeech.audio.compliance.kaldi.fbank(
paddle.to_tensor(self.waveform.astype(self.dtype)))
np.testing.assert_array_almost_equal(
ta_features, pa_features, decimal=4)
@ -71,7 +71,7 @@ class TestKaldi(FeatTest):
def test_mfcc(self):
ta_features = torchaudio.compliance.kaldi.mfcc(
torch.from_numpy(self.waveform.astype(self.dtype)))
pa_features = paddleaudio.compliance.kaldi.mfcc(
pa_features = paddlespeech.audio.compliance.kaldi.mfcc(
paddle.to_tensor(self.waveform.astype(self.dtype)))
np.testing.assert_array_almost_equal(
ta_features, pa_features, decimal=4)

@ -16,10 +16,10 @@ import unittest
import librosa
import numpy as np
import paddle
import paddleaudio
from paddleaudio.functional.window import get_window
import paddlespeech.audio
from .base import FeatTest
from paddlespeech.audio.functional.window import get_window
class TestLibrosa(FeatTest):
@ -117,7 +117,7 @@ class TestLibrosa(FeatTest):
htk=False,
norm='slaney',
dtype=self.waveform.dtype, )
feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix(
feature_compliance = paddlespeech.audio.compliance.librosa.compute_fbank_matrix(
sr=self.sr,
n_fft=self.n_fft,
n_mels=self.n_mels,
@ -127,7 +127,7 @@ class TestLibrosa(FeatTest):
norm='slaney',
dtype=self.waveform.dtype, )
x = paddle.to_tensor(self.waveform)
feature_functional = paddleaudio.functional.compute_fbank_matrix(
feature_functional = paddlespeech.audio.functional.compute_fbank_matrix(
sr=self.sr,
n_fft=self.n_fft,
n_mels=self.n_mels,
@ -156,8 +156,8 @@ class TestLibrosa(FeatTest):
n_mels=self.n_mels,
fmin=self.fmin)
# paddleaudio.compliance.librosa:
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
# paddlespeech.audio.compliance.librosa:
feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram(
x=self.waveform,
sr=self.sr,
window_size=self.n_fft,
@ -166,10 +166,10 @@ class TestLibrosa(FeatTest):
fmin=self.fmin,
to_db=False)
# paddleaudio.features.layer
# paddlespeech.audio.features.layer
x = paddle.to_tensor(
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
feature_extractor = paddleaudio.features.MelSpectrogram(
feature_extractor = paddlespeech.audio.features.MelSpectrogram(
sr=self.sr,
n_fft=self.n_fft,
hop_length=self.hop_length,
@ -198,8 +198,8 @@ class TestLibrosa(FeatTest):
fmin=self.fmin)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
# paddleaudio.compliance.librosa:
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
# paddlespeech.audio.compliance.librosa:
feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram(
x=self.waveform,
sr=self.sr,
window_size=self.n_fft,
@ -207,10 +207,10 @@ class TestLibrosa(FeatTest):
n_mels=self.n_mels,
fmin=self.fmin)
# paddleaudio.features.layer
# paddlespeech.audio.features.layer
x = paddle.to_tensor(
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
feature_extractor = paddleaudio.features.LogMelSpectrogram(
feature_extractor = paddlespeech.audio.features.LogMelSpectrogram(
sr=self.sr,
n_fft=self.n_fft,
hop_length=self.hop_length,
@ -243,8 +243,8 @@ class TestLibrosa(FeatTest):
n_mels=self.n_mels,
fmin=self.fmin)
# paddleaudio.compliance.librosa:
feature_compliance = paddleaudio.compliance.librosa.mfcc(
# paddlespeech.audio.compliance.librosa:
feature_compliance = paddlespeech.audio.compliance.librosa.mfcc(
x=self.waveform,
sr=self.sr,
n_mfcc=self.n_mfcc,
@ -257,10 +257,10 @@ class TestLibrosa(FeatTest):
fmin=self.fmin,
top_db=self.top_db)
# paddleaudio.features.layer
# paddlespeech.audio.features.layer
x = paddle.to_tensor(
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
feature_extractor = paddleaudio.features.MFCC(
feature_extractor = paddlespeech.audio.features.MFCC(
sr=self.sr,
n_mfcc=self.n_mfcc,
n_fft=self.n_fft,

@ -15,8 +15,8 @@ import unittest
import numpy as np
import paddle
import paddleaudio
import paddlespeech.audio
from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram
@ -33,8 +33,7 @@ class TestLogMelSpectrogram(FeatTest):
ps_res = ps_melspect(self.waveform.T).squeeze(1).T
x = paddle.to_tensor(self.waveform)
# paddlespeech.s2t的特征存在幅度谱和功率谱滥用的情况
ps_melspect = paddleaudio.features.LogMelSpectrogram(
ps_melspect = paddlespeech.audio.features.LogMelSpectrogram(
self.sr,
self.n_fft,
self.hop_length,

@ -15,8 +15,8 @@ import unittest
import numpy as np
import paddle
import paddleaudio
import paddlespeech.audio
from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import Spectrogram
@ -31,7 +31,7 @@ class TestSpectrogram(FeatTest):
ps_res = ps_spect(self.waveform.T).squeeze(1).T # Magnitude
x = paddle.to_tensor(self.waveform)
pa_spect = paddleaudio.features.Spectrogram(
pa_spect = paddlespeech.audio.features.Spectrogram(
self.n_fft, self.hop_length, power=1.0)
pa_res = pa_spect(x).squeeze(0).numpy()

@ -15,9 +15,9 @@ import unittest
import numpy as np
import paddle
from paddleaudio.functional.window import get_window
from .base import FeatTest
from paddlespeech.audio.functional.window import get_window
from paddlespeech.s2t.transform.spectrogram import Stft
Loading…
Cancel
Save