diff --git a/.gitignore b/.gitignore index cd2360e15..cfdf0275e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,10 @@ *.npz *.done *.whl +*.egg-info +build + +docs/build/ tools/venv tools/kenlm diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 218470ec8..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -myst_parser -numpydoc -recommonmark>=0.5.0 -sphinx -sphinx-autobuild -sphinx-markdown-tables -sphinx_rtd_theme diff --git a/parakeet/__init__.py b/parakeet/__init__.py index 8b99260ed..87528b833 100644 --- a/parakeet/__init__.py +++ b/parakeet/__init__.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.0.0" import logging from . import data from . import datasets diff --git a/requirements.txt b/requirements.txt index 332b52388..2a3f06514 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,6 @@ sacrebleu scipy==1.2.1 sentencepiece snakeviz -SoundFile==0.9.0.post1 sox tensorboardX textgrid @@ -20,3 +19,24 @@ tqdm typeguard visualdl==2.2.0 yacs +numpy==1.20.0 +numba +nltk +inflect +librosa +unidecode +llvmlite +matplotlib +pandas +soundfile~=0.10 +g2p_en +pypinyin +webrtcvad +g2pM +praatio~=4.1 +h5py +timer +pyworld +jieba +phkit +yq diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..625454805 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[build_ext] +debug=0 + +[metadata] +license_file = LICENSE +description-file = README.md + +[magformat] +formatters=yapf diff --git a/setup.py b/setup.py index 34dcd9195..07b6aac04 100644 --- a/setup.py +++ b/setup.py @@ -14,73 +14,165 @@ import io import os import re +import sys +from pathlib import Path +import contextlib +import inspect from setuptools import find_packages from setuptools import setup +from setuptools import Command +from setuptools.command.develop import develop +from setuptools.command.install import install +import subprocess as sp + +HERE = Path(os.path.abspath(os.path.dirname(__file__))) + + +@contextlib.contextmanager +def pushd(new_dir): + old_dir = os.getcwd() + os.chdir(new_dir) + print(new_dir) + yield + os.chdir(old_dir) + print(old_dir) def read(*names, **kwargs): - with io.open( - os.path.join(os.path.dirname(__file__), *names), - encoding=kwargs.get("encoding", "utf8")) as fp: + with io.open(os.path.join(os.path.dirname(__file__), *names), + encoding=kwargs.get("encoding", "utf8")) as fp: return fp.read() -def find_version(*file_paths): - version_file = read(*file_paths) - version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", - version_file, re.M) - if version_match: - return version_match.group(1) - raise RuntimeError("Unable to find version string.") +def check_call(cmd: str, shell=False, executable=None): + try: + sp.check_call(cmd.split(), + shell=shell, + executable="/bin/bash" if shell else executable) + except sp.CalledProcessError as e: + print( + f"{__file__}:{inspect.currentframe().f_lineno}: CMD: {cmd}, Error:", + e.output, + file=sys.stderr) + raise e + + +def _remove(files: str): + for f in files: + f.unlink() + + +def _post_install(install_lib_dir): + # apt + check_call("apt-get update -y") + check_call("apt-get install -y " + 'vim tig tree sox pkg-config ' + + 'libsndfile1 libflac-dev libogg-dev ' + + 'libvorbis-dev libboost-dev swig python3-dev ') + print("apt install.") + + # tools/make + tool_dir = HERE / "tools" + _remove(tool_dir.glob("*.done")) + with pushd(tool_dir): + check_call("make") + print("tools install.") + + # install autolog + tools_extrs_dir = HERE / 'tools/extras' + with pushd(tools_extrs_dir): + print(os.getcwd()) + check_call(f"./install_autolog.sh") + print("autolog install.") + + # ctcdecoder + ctcdecoder_dir = HERE / 'deepspeech/decoders/ctcdecoder/swig' + with pushd(ctcdecoder_dir): + check_call("bash -e setup.sh") + print("ctcdecoder install.") + + # install third_party + third_party_dir = HERE / 'third_party' + with pushd(third_party_dir): + check_call("bash -e install.sh") + print("third_party install.") + + +class DevelopCommand(develop): + def run(self): + develop.run(self) + # must after develop.run, or pkg install by shell will not see + self.execute(_post_install, (self.install_lib, ), msg="Post Install...") -VERSION = find_version('parakeet', '__init__.py') -long_description = read("README.md") +class InstallCommand(install): + def run(self): + install.run(self) + # must after install.run, or pkg install by shell will not see + self.execute(_post_install, (self.install_lib, ), msg="Post Install...") + + +# cmd: python setup.py upload +class UploadCommand(Command): + description = "Build and publish the package." + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + try: + print("Removing previous dist/ ...") + shutil.rmtree(str(HERE / "dist")) + except OSError: + pass + print("Building source distribution...") + sp.check_call([sys.executable, "setup.py", "sdist"]) + print("Uploading package to PyPi...") + sp.check_call(["twine", "upload", "dist/*"]) + sys.exit() + setup_info = dict( # Metadata - name='paddle-parakeet', - version=VERSION, - author='PaddleSL Team', + name='paddle-speech', + version='2.1.2', + author='PaddleSL Speech Team', author_email='', - url='https://github.com/PaddlePaddle', - description='Speech synthesis tools and models based on Paddlepaddle', - long_description=long_description, - long_description_content_type="text/markdown", + url='https://github.com/PaddlePaddle/DeepSpeech', license='Apache 2', - python_requires='>=3.6', - install_requires=[ - 'numpy', - 'nltk', - 'inflect', - 'librosa', - 'unidecode', - 'numba', - 'tqdm', - 'llvmlite', - 'matplotlib', - 'visualdl==2.2.0', - 'scipy', - 'pandas', - 'sox', - 'soundfile~=0.10', - 'g2p_en', - 'yacs', - 'pypinyin', - 'webrtcvad', - 'g2pM', - 'praatio~=4.1', - "h5py", - "timer", - 'jsonlines', - 'pyworld', - 'typeguard', - 'jieba', - "phkit", + description='Speech tools and models based on Paddlepaddle', + long_description=read("README.md"), + long_description_content_type="text/markdown", + keywords=[ + "speech", + "asr", + "tts", + "text frontend", + "MFA", + "paddlepaddle", + "transformer", + "conformer", + "fastspeech", + "vocoder", + "pwgan", + "gan", ], + python_requires='>=3.6', + install_requires=[d.strip() for d in read('requirements.txt').split()], extras_require={ - 'doc': ["sphinx", "sphinx-rtd-theme", "numpydoc"], + 'doc': [ + "sphinx", "sphinx-rtd-theme", "numpydoc", "myst_parser", + "recommonmark>=0.5.0", "sphinx-markdown-tables", "sphinx-autobuild" + ], + }, + cmdclass={ + 'develop': DevelopCommand, + 'install': InstallCommand, + 'upload': UploadCommand, }, # Package info @@ -92,8 +184,12 @@ setup_info = dict( 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', - ], ) + 'Programming Language :: Python :: 3.8', + ], +) setup(**setup_info) diff --git a/setup.sh b/setup.sh index 04ee12b49..d3dd8207c 100644 --- a/setup.sh +++ b/setup.sh @@ -26,7 +26,7 @@ source tools/venv/bin/activate # install python dependencies if [ -f "requirements.txt" ]; then - pip3 install -r requirements.txt + pip3 install . fi if [ $? != 0 ]; then error_msg "Install python dependencies failed !!!" diff --git a/tools/Makefile b/tools/Makefile index 5690ea91e..a06074b37 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -6,15 +6,15 @@ CC ?= gcc # used for sph2pipe # CXX = clang++ # Uncomment these lines... # CC = clang # ...to build with Clang. -WGET ?= wget --no-check-certificate +$(WGET) ?= wget --no-check-certificate .PHONY: all clean -all: virtualenv kenlm.done sox.done soxbindings.done mfa.done sclite.done +all: virtualenv.done kenlm.done sox.done soxbindings.done mfa.done sclite.done -virtualenv: +virtualenv.done: test -d venv || virtualenv -p $(PYTHON) venv - touch venv/bin/activate + touch virtualenv.done clean: rm -fr venv @@ -24,27 +24,27 @@ clean: kenlm.done: # Ubuntu 16.04 透過 apt 會安裝 boost 1.58.0 # it seems that boost (1.54.0) requires higher version. After I switched to g++-5 it compiles normally. - apt install -y build-essential cmake libboost-system-dev libboost-thread-dev libboost-program-options-dev libboost-test-dev libeigen3-dev zlib1g-dev libbz2-dev liblzma-dev + apt install -y --allow-unauthenticated build-essential cmake libboost-system-dev libboost-thread-dev libboost-program-options-dev libboost-test-dev libeigen3-dev zlib1g-dev libbz2-dev liblzma-dev apt-get install -y gcc-5 g++-5 && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50 && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50 - test -d kenlm || wget -O - https://kheafield.com/code/kenlm.tar.gz --no-check-certificate | tar xz + test -d kenlm || $(WGET) -O - https://kheafield.com/code/kenlm.tar.gz | tar xz rm -rf kenlm/build && mkdir -p kenlm/build && cd kenlm/build && cmake .. && make -j4 && make install - source venv/bin/activate; cd kenlm && python setup.py install + cd kenlm && python setup.py install touch kenlm.done sox.done: apt install -y libvorbis-dev libmp3lame-dev libmad-ocaml-dev - test -d sox-14.4.2 || wget https://nchc.dl.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.gz --no-check-certificate + test -d sox-14.4.2 || $(WGET) https://nchc.dl.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.gz tar -xvzf sox-14.4.2.tar.gz -C . cd sox-14.4.2 && ./configure --prefix=/usr/ && make -j4 && make install touch sox.done soxbindings.done: test -d soxbindings || git clone https://github.com/pseeth/soxbindings.git - source venv/bin/activate; cd soxbindings && python setup.py install + cd soxbindings && python setup.py install touch soxbindings.done mfa.done: - test -d montreal-forced-aligner || wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz --no-check-certificate + test -d montreal-forced-aligner || $(WGET) https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz tar xvf montreal-forced-aligner_linux.tar.gz touch mfa.done @@ -85,7 +85,7 @@ sctk-$(SCTK_GITHASH).tar.gz: if [ -d '$(DOWNLOAD_DIR)' ]; then \ cp -p '$(DOWNLOAD_DIR)/sctk-$(SCTK_GITHASH).tar.gz' .; \ else \ - $(WGET) -nv -T 10 -t 3 -O sctk-$(SCTK_GITHASH).tar.gz \ + $($(WGET)) -nv -T 10 -t 3 -O sctk-$(SCTK_GITHASH).tar.gz \ https://github.com/usnistgov/SCTK/archive/$(SCTK_GITHASH).tar.gz; \ fi diff --git a/tools/extras/install_autolog.sh b/tools/extras/install_autolog.sh new file mode 100755 index 000000000..062b839b6 --- /dev/null +++ b/tools/extras/install_autolog.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +#install auto-log +echo "Install auto_log into default system path" +rm -rf AutoLog || true +test -d AutoLog || git clone https://github.com/LDOUBLEV/AutoLog +if [ $? != 0 ]; then + error_msg "Download auto_log failed !!!" + exit 1 +fi + +pushd AutoLog +pip3 install -r requirements.txt +python3 setup.py install +popd + +rm -rf AutoLog || true