Add paddlespeech.cls and esc50 example.

pull/1027/head
KP 3 years ago
parent b12ae34ef1
commit 2c531d78ac

@ -0,0 +1,13 @@
#!/bin/bash
device=$1
audio_file=$2
ckpt_dir=$3
feat_backend=$4
python3 ${BIN_DIR}/predict.py \
--device ${device} \
--wav ${audio_file} \
--feat_backend ${feat_backend} \
--top_k 10 \
--checkpoint ${ckpt_dir}/model.pdparams

@ -0,0 +1,27 @@
#!/bin/bash
ngpu=$1
device=$2
feat_backend=$3
num_epochs=50
batch_size=16
ckpt_dir=./checkpoint
save_freq=10
if [ ${ngpu} -gt 1 ]; then
python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
else
python3 ${BIN_DIR}/train.py \
--device ${device} \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
fi

@ -1,3 +1,4 @@
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../` export MAIN_ROOT=`realpath ${PWD}/../../../`
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH} export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
@ -8,4 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1
export PYTHONIOENCODING=UTF-8 export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ MODEL=PANNs
export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL}

@ -11,41 +11,17 @@ fi
stage=$1 stage=$1
stop_stage=100 stop_stage=100
num_epochs=50
batch_size=16
ckpt_dir=./checkpoint
save_freq=10
feat_backend=numpy feat_backend=numpy
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [ ${ngpu} -gt 1 ]; then ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1
python -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES local/train.py \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
else
python local/train.py \
--device ${device} \
--epochs ${num_epochs} \
--feat_backend ${feat_backend} \
--batch_size ${batch_size} \
--checkpoint_dir ${ckpt_dir} \
--save_freq ${save_freq}
fi
fi fi
audio_file=~/cat.wav audio_file=~/cat.wav
ckpt=./checkpoint/epoch_50/model.pdparams ckpt_dir=./checkpoint/epoch_50
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
python local/predict.py \ ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1
--device ${device} \
--wav ${audio_file} \
--feat_backend ${feat_backend} \
--top_k 10 \
--checkpoint ${ckpt}
fi fi
exit 0 exit 0

@ -0,0 +1,15 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .backends import *
from .features import *

@ -17,7 +17,6 @@ from typing import List
from paddle.framework import load as load_state_dict from paddle.framework import load as load_state_dict
from paddle.utils import download from paddle.utils import download
from pathos.multiprocessing import ProcessPool
from .log import logger from .log import logger
@ -32,27 +31,18 @@ def decompress(file: str):
download._decompress(file) download._decompress(file)
def download_and_decompress(archives: List[Dict[str, str]], def download_and_decompress(archives: List[Dict[str, str]], path: str):
path: str,
n_workers: int=0):
""" """
Download archieves and decompress to specific path. Download archieves and decompress to specific path.
""" """
if not os.path.isdir(path): if not os.path.isdir(path):
os.makedirs(path) os.makedirs(path)
if n_workers <= 0:
for archive in archives: for archive in archives:
assert 'url' in archive and 'md5' in archive, \ assert 'url' in archive and 'md5' in archive, \
'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}' 'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
download.get_path_from_url(archive['url'], path, archive['md5']) download.get_path_from_url(archive['url'], path, archive['md5'])
else:
pool = ProcessPool(nodes=n_workers)
pool.imap(download.get_path_from_url, [_['url'] for _ in archives],
[path] * len(archives), [_['md5'] for _ in archives])
pool.close()
pool.join()
def load_state_dict_from_url(url: str, path: str, md5: str=None): def load_state_dict_from_url(url: str, path: str, md5: str=None):

@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
''' '''
This module is used to store environmental variables in PaddleSpeech. This module is used to store environmental variables in PaddleAudio.
PPSPEECH_HOME --> the root directory for storing PaddleSpeech related data. Default to ~/.paddlespeech. Users can change the PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the
default value through the PPSPEECH_HOME environment variable. default value through the PPAUDIO_HOME environment variable.
MODEL_HOME --> Store model files. MODEL_HOME --> Store model files.
DATA_HOME --> Store automatically downloaded datasets. DATA_HOME --> Store automatically downloaded datasets.
''' '''
@ -25,29 +25,29 @@ def _get_user_home():
return os.path.expanduser('~') return os.path.expanduser('~')
def _get_package_home(): def _get_ppaudio_home():
if 'PPSPEECH_HOME' in os.environ: if 'PPAUDIO_HOME' in os.environ:
home_path = os.environ['PPSPEECH_HOME'] home_path = os.environ['PPAUDIO_HOME']
if os.path.exists(home_path): if os.path.exists(home_path):
if os.path.isdir(home_path): if os.path.isdir(home_path):
return home_path return home_path
else: else:
raise RuntimeError( raise RuntimeError(
'The environment variable PPSPEECH_HOME {} is not a directory.'. 'The environment variable PPAUDIO_HOME {} is not a directory.'.
format(home_path)) format(home_path))
else: else:
return home_path return home_path
return os.path.join(_get_user_home(), '.paddlespeech') return os.path.join(_get_user_home(), '.paddleaudio')
def _get_sub_home(directory): def _get_sub_home(directory):
home = os.path.join(_get_package_home(), directory) home = os.path.join(_get_ppaudio_home(), directory)
if not os.path.exists(home): if not os.path.exists(home):
os.makedirs(home) os.makedirs(home)
return home return home
USER_HOME = _get_user_home() USER_HOME = _get_user_home()
PPSPEECH_HOME = _get_package_home() PPAUDIO_HOME = _get_ppaudio_home()
MODEL_HOME = _get_sub_home('pretrained_models') MODEL_HOME = _get_sub_home('models')
DATA_HOME = _get_sub_home('datasets') DATA_HOME = _get_sub_home('datasets')

@ -55,13 +55,13 @@ log_config = {
class Logger(object): class Logger(object):
''' '''
Deafult logger in PaddleSpeech Deafult logger in PaddleAudio
Args: Args:
name(str) : Logger name, default is 'PaddleSpeech' name(str) : Logger name, default is 'PaddleAudio'
''' '''
def __init__(self, name: str=None): def __init__(self, name: str=None):
name = 'PaddleSpeech' if not name else name name = 'PaddleAudio' if not name else name
self.logger = logging.getLogger(name) self.logger = logging.getLogger(name)
for key, conf in log_config.items(): for key, conf in log_config.items():

@ -11,5 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .backends import *
from .features import *

@ -15,10 +15,10 @@ import argparse
import os import os
import paddle import paddle
from model import SoundClassifier
from paddlespeech.cls.datasets import ESC50 from .model import SoundClassifier
from paddlespeech.cls.models.panns import cnn14 from .panns import cnn14
from paddleaudio.datasets import ESC50
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)

@ -16,8 +16,8 @@ import os
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ..utils.download import load_state_dict_from_url from paddleaudio.utils.download import load_state_dict_from_url
from ..utils.env import MODEL_HOME from paddleaudio.utils.env import MODEL_HOME
__all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']

@ -17,12 +17,12 @@ import numpy as np
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
from model import SoundClassifier from model import SoundClassifier
from panns import cnn14
from paddlespeech.cls.backends import load as load_audio from paddleaudio.backends import load as load_audio
from paddlespeech.cls.datasets import ESC50 from paddleaudio.datasets import ESC50
from paddlespeech.cls.features import LogMelSpectrogram from paddleaudio.features import LogMelSpectrogram
from paddlespeech.cls.features import melspectrogram from paddleaudio.features import melspectrogram
from paddlespeech.cls.models.panns import cnn14
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)

@ -16,12 +16,12 @@ import os
import paddle import paddle
from model import SoundClassifier from model import SoundClassifier
from panns import cnn14
from paddlespeech.cls.datasets import ESC50 from paddleaudio.datasets import ESC50
from paddlespeech.cls.features import LogMelSpectrogram from paddleaudio.features import LogMelSpectrogram
from paddlespeech.cls.models.panns import cnn14 from paddleaudio.utils import logger
from paddlespeech.cls.utils import logger from paddleaudio.utils import Timer
from paddlespeech.cls.utils import Timer
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)

@ -0,0 +1,13 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,4 +1,3 @@
colorlog
ConfigArgParse ConfigArgParse
coverage coverage
distro distro
@ -19,10 +18,10 @@ matplotlib
nara_wpe nara_wpe
nltk nltk
numba numba
paddleaudio
paddlespeech_ctcdecoders paddlespeech_ctcdecoders
paddlespeech_feat paddlespeech_feat
pandas pandas
pathos
phkit phkit
Pillow Pillow
praatio~=4.1 praatio~=4.1

@ -173,7 +173,7 @@ setup_info = dict(
# Package info # Package info
packages=find_packages(exclude=('utils', 'tests', 'tests.*', 'examples*', packages=find_packages(exclude=('utils', 'tests', 'tests.*', 'examples*',
'third_party*', 'tools*')), 'paddleaudio*', 'third_party*', 'tools*')),
zip_safe=True, zip_safe=True,
classifiers=[ classifiers=[
'Development Status :: 3 - Alpha', 'Development Status :: 3 - Alpha',

@ -0,0 +1,41 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import setuptools
# set the version here
version = '0.1.0a'
setuptools.setup(
name="paddleaudio",
version=version,
author="",
author_email="",
description="PaddleAudio, in development",
long_description="",
long_description_content_type="text/markdown",
url="",
packages=setuptools.find_packages(include=['paddleaudio*']),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires='>=3.6',
install_requires=[
'numpy >= 1.15.0',
'scipy >= 1.0.0',
'resampy >= 0.2.2',
'soundfile >= 0.9.0',
'colorlog',
], )
Loading…
Cancel
Save