Add paddlespeech.cls and esc50 example.

pull/1027/head
KP 3 years ago
parent b12ae34ef1
commit 2c531d78ac

@ -0,0 +1,13 @@
#!/bin/bash
device=$1
audio_file=$2
ckpt_dir=$3
feat_backend=$4
python3 ${BIN_DIR}/predict.py \
--device ${device} \
--wav ${audio_file} \
--feat_backend ${feat_backend} \
--top_k 10 \
--checkpoint ${ckpt_dir}/model.pdparams

@ -0,0 +1,27 @@
#!/bin/bash
ngpu=$1
device=$2
feat_backend=$3
num_epochs=50
batch_size=16
ckpt_dir=./checkpoint
save_freq=10
if [ ${ngpu} -gt 1 ]; then
python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
else
python3 ${BIN_DIR}/train.py \
--device ${device} \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
fi

@ -1,3 +1,4 @@
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
@ -8,4 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
MODEL=PANNs
export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL}

@ -11,41 +11,17 @@ fi
stage=$1
stop_stage=100
num_epochs=50
batch_size=16
ckpt_dir=./checkpoint
save_freq=10
feat_backend=numpy
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [ ${ngpu} -gt 1 ]; then
python -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES local/train.py \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
else
python local/train.py \
--device ${device} \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
fi
./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1
fi
audio_file=~/cat.wav
ckpt=./checkpoint/epoch_50/model.pdparams
ckpt_dir=./checkpoint/epoch_50
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
python local/predict.py \
--device ${device} \
--wav ${audio_file} \
--feat_backend ${feat_backend} \
--top_k 10 \
--checkpoint ${ckpt}
./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1
fi
exit 0

@ -0,0 +1,15 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .backends import *
from .features import *

@ -17,7 +17,6 @@ from typing import List
from paddle.framework import load as load_state_dict
from paddle.utils import download
from pathos.multiprocessing import ProcessPool
from .log import logger
@ -32,27 +31,18 @@ def decompress(file: str):
download._decompress(file)
def download_and_decompress(archives: List[Dict[str, str]],
path: str,
n_workers: int=0):
def download_and_decompress(archives: List[Dict[str, str]], path: str):
"""
Download archieves and decompress to specific path.
"""
if not os.path.isdir(path):
os.makedirs(path)
if n_workers <= 0:
for archive in archives:
assert 'url' in archive and 'md5' in archive, \
'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
for archive in archives:
assert 'url' in archive and 'md5' in archive, \
'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
download.get_path_from_url(archive['url'], path, archive['md5'])
else:
pool = ProcessPool(nodes=n_workers)
pool.imap(download.get_path_from_url, [_['url'] for _ in archives],
[path] * len(archives), [_['md5'] for _ in archives])
pool.close()
pool.join()
download.get_path_from_url(archive['url'], path, archive['md5'])
def load_state_dict_from_url(url: str, path: str, md5: str=None):

@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
'''
This module is used to store environmental variables in PaddleSpeech.
PPSPEECH_HOME --> the root directory for storing PaddleSpeech related data. Default to ~/.paddlespeech. Users can change the
default value through the PPSPEECH_HOME environment variable.
This module is used to store environmental variables in PaddleAudio.
PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the
default value through the PPAUDIO_HOME environment variable.
MODEL_HOME --> Store model files.
DATA_HOME --> Store automatically downloaded datasets.
'''
@ -25,29 +25,29 @@ def _get_user_home():
return os.path.expanduser('~')
def _get_package_home():
if 'PPSPEECH_HOME' in os.environ:
home_path = os.environ['PPSPEECH_HOME']
def _get_ppaudio_home():
if 'PPAUDIO_HOME' in os.environ:
home_path = os.environ['PPAUDIO_HOME']
if os.path.exists(home_path):
if os.path.isdir(home_path):
return home_path
else:
raise RuntimeError(
'The environment variable PPSPEECH_HOME {} is not a directory.'.
'The environment variable PPAUDIO_HOME {} is not a directory.'.
format(home_path))
else:
return home_path
return os.path.join(_get_user_home(), '.paddlespeech')
return os.path.join(_get_user_home(), '.paddleaudio')
def _get_sub_home(directory):
home = os.path.join(_get_package_home(), directory)
home = os.path.join(_get_ppaudio_home(), directory)
if not os.path.exists(home):
os.makedirs(home)
return home
USER_HOME = _get_user_home()
PPSPEECH_HOME = _get_package_home()
MODEL_HOME = _get_sub_home('pretrained_models')
PPAUDIO_HOME = _get_ppaudio_home()
MODEL_HOME = _get_sub_home('models')
DATA_HOME = _get_sub_home('datasets')

@ -55,13 +55,13 @@ log_config = {
class Logger(object):
'''
Deafult logger in PaddleSpeech
Deafult logger in PaddleAudio
Args:
name(str) : Logger name, default is 'PaddleSpeech'
name(str) : Logger name, default is 'PaddleAudio'
'''
def __init__(self, name: str=None):
name = 'PaddleSpeech' if not name else name
name = 'PaddleAudio' if not name else name
self.logger = logging.getLogger(name)
for key, conf in log_config.items():

@ -11,5 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .backends import *
from .features import *

@ -15,10 +15,10 @@ import argparse
import os
import paddle
from model import SoundClassifier
from paddlespeech.cls.datasets import ESC50
from paddlespeech.cls.models.panns import cnn14
from .model import SoundClassifier
from .panns import cnn14
from paddleaudio.datasets import ESC50
# yapf: disable
parser = argparse.ArgumentParser(__doc__)

@ -16,8 +16,8 @@ import os
import paddle.nn as nn
import paddle.nn.functional as F
from ..utils.download import load_state_dict_from_url
from ..utils.env import MODEL_HOME
from paddleaudio.utils.download import load_state_dict_from_url
from paddleaudio.utils.env import MODEL_HOME
__all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']

@ -17,12 +17,12 @@ import numpy as np
import paddle
import paddle.nn.functional as F
from model import SoundClassifier
from panns import cnn14
from paddlespeech.cls.backends import load as load_audio
from paddlespeech.cls.datasets import ESC50
from paddlespeech.cls.features import LogMelSpectrogram
from paddlespeech.cls.features import melspectrogram
from paddlespeech.cls.models.panns import cnn14
from paddleaudio.backends import load as load_audio
from paddleaudio.datasets import ESC50
from paddleaudio.features import LogMelSpectrogram
from paddleaudio.features import melspectrogram
# yapf: disable
parser = argparse.ArgumentParser(__doc__)

@ -16,12 +16,12 @@ import os
import paddle
from model import SoundClassifier
from panns import cnn14
from paddlespeech.cls.datasets import ESC50
from paddlespeech.cls.features import LogMelSpectrogram
from paddlespeech.cls.models.panns import cnn14
from paddlespeech.cls.utils import logger
from paddlespeech.cls.utils import Timer
from paddleaudio.datasets import ESC50
from paddleaudio.features import LogMelSpectrogram
from paddleaudio.utils import logger
from paddleaudio.utils import Timer
# yapf: disable
parser = argparse.ArgumentParser(__doc__)

@ -0,0 +1,13 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,4 +1,3 @@
colorlog
ConfigArgParse
coverage
distro
@ -19,10 +18,10 @@ matplotlib
nara_wpe
nltk
numba
paddleaudio
paddlespeech_ctcdecoders
paddlespeech_feat
pandas
pathos
phkit
Pillow
praatio~=4.1

@ -173,7 +173,7 @@ setup_info = dict(
# Package info
packages=find_packages(exclude=('utils', 'tests', 'tests.*', 'examples*',
'third_party*', 'tools*')),
'paddleaudio*', 'third_party*', 'tools*')),
zip_safe=True,
classifiers=[
'Development Status :: 3 - Alpha',

@ -0,0 +1,41 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import setuptools
# set the version here
version = '0.1.0a'
setuptools.setup(
name="paddleaudio",
version=version,
author="",
author_email="",
description="PaddleAudio, in development",
long_description="",
long_description_content_type="text/markdown",
url="",
packages=setuptools.find_packages(include=['paddleaudio*']),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires='>=3.6',
install_requires=[
'numpy >= 1.15.0',
'scipy >= 1.0.0',
'resampy >= 0.2.2',
'soundfile >= 0.9.0',
'colorlog',
], )
Loading…
Cancel
Save