Update download logic and fix README typos.

pull/1095/head
KP 3 years ago
parent 9f3d9aee13
commit 3701fba0be

@ -22,7 +22,7 @@ wget https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav https://paddlespeech
### 3. Usage ### 3. Usage
- Command Line(Recommended) - Command Line(Recommended)
```bash ```bash
paddlespeech cls --input ~/cat.wav --topk 10 paddlespeech cls --input ./cat.wav --topk 10
``` ```
Usage: Usage:
```bash ```bash

@ -22,7 +22,7 @@ wget https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.
### 3. Usage ### 3. Usage
- Command Line(Recommended) - Command Line(Recommended)
```bash ```bash
paddlespeech asr --input ~/zh.wav paddlespeech asr --input ./zh.wav
``` ```
Usage: Usage:
```bash ```bash

@ -22,7 +22,7 @@ wget https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.
### 3. Usage ### 3. Usage
- Command Line(Recommended) - Command Line(Recommended)
```bash ```bash
paddlespeech st --input ~/en.wav paddlespeech st --input ./en.wav
``` ```
Usage: Usage:
```bash ```bash

@ -27,9 +27,9 @@ import yaml
from yacs.config import CfgNode from yacs.config import CfgNode
from ..executor import BaseExecutor from ..executor import BaseExecutor
from ..log import logger
from ..utils import cli_register from ..utils import cli_register
from ..utils import download_and_decompress from ..utils import download_and_decompress
from ..utils import logger
from ..utils import MODEL_HOME from ..utils import MODEL_HOME
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.s2t.transform.transformation import Transformation

@ -20,14 +20,14 @@ from typing import Union
import numpy as np import numpy as np
import paddle import paddle
import yaml import yaml
from paddleaudio import load
from paddleaudio.features import LogMelSpectrogram
from ..executor import BaseExecutor from ..executor import BaseExecutor
from ..log import logger
from ..utils import cli_register from ..utils import cli_register
from ..utils import download_and_decompress from ..utils import download_and_decompress
from ..utils import logger
from ..utils import MODEL_HOME from ..utils import MODEL_HOME
from paddleaudio import load
from paddleaudio.features import LogMelSpectrogram
from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.dynamic_import import dynamic_import
__all__ = ['CLSExecutor'] __all__ = ['CLSExecutor']

@ -20,49 +20,21 @@ import os
import os.path as osp import os.path as osp
import shutil import shutil
import subprocess import subprocess
import sys
import tarfile import tarfile
import time import time
import zipfile import zipfile
import requests import requests
from tqdm import tqdm
try: from .log import logger
from tqdm import tqdm
except:
class tqdm(object): __all__ = ['get_path_from_url']
def __init__(self, total=None):
self.total = total
self.n = 0
def update(self, n):
self.n += n
if self.total is None:
sys.stderr.write("\r{0:.1f} bytes".format(self.n))
else:
sys.stderr.write(
"\r{0:.1f}%".format(100 * self.n / float(self.total)))
sys.stderr.flush()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stderr.write('\n')
import logging
logger = logging.getLogger(__name__)
__all__ = ['get_weights_path_from_url']
WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3 DOWNLOAD_RETRY_LIMIT = 3
def is_url(path): def _is_url(path):
""" """
Whether path is URL. Whether path is URL.
Args: Args:
@ -71,25 +43,6 @@ def is_url(path):
return path.startswith('http://') or path.startswith('https://') return path.startswith('http://') or path.startswith('https://')
def get_weights_path_from_url(url, md5sum=None):
"""Get weights path from WEIGHT_HOME, if not exists,
download it from url.
Args:
url (str): download url
md5sum (str): md5 sum of download package
Returns:
str: a local path to save downloaded weights.
Examples:
.. code-block:: python
from paddle.utils.download import get_weights_path_from_url
resnet18_pretrained_weight_url = 'https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams'
local_weight_path = get_weights_path_from_url(resnet18_pretrained_weight_url)
"""
path = get_path_from_url(url, WEIGHTS_HOME, md5sum)
return path
def _map_path(url, root_dir): def _map_path(url, root_dir):
# parse path after download under root_dir # parse path after download under root_dir
fname = osp.split(url)[-1] fname = osp.split(url)[-1]
@ -135,7 +88,7 @@ def get_path_from_url(url,
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
assert is_url(url), "downloading from {} not a url".format(url) assert _is_url(url), "downloading from {} not a url".format(url)
# parse path after download to decompress under root_dir # parse path after download to decompress under root_dir
fullpath = _map_path(url, root_dir) fullpath = _map_path(url, root_dir)
# Mainly used to solve the problem of downloading data from different # Mainly used to solve the problem of downloading data from different

@ -0,0 +1,60 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import logging
__all__ = [
'logger',
]
class Logger(object):
def __init__(self, name: str=None):
name = 'PaddleSpeech' if not name else name
self.logger = logging.getLogger(name)
log_config = {
'DEBUG': 10,
'INFO': 20,
'TRAIN': 21,
'EVAL': 22,
'WARNING': 30,
'ERROR': 40,
'CRITICAL': 50,
'EXCEPTION': 100,
}
for key, level in log_config.items():
logging.addLevelName(level, key)
if key == 'EXCEPTION':
self.__dict__[key.lower()] = self.logger.exception
else:
self.__dict__[key.lower()] = functools.partial(self.__call__,
level)
self.format = logging.Formatter(
fmt='[%(asctime)-15s] [%(levelname)8s] [%(filename)s] [L%(lineno)d] - %(message)s'
)
self.handler = logging.StreamHandler()
self.handler.setFormatter(self.format)
self.logger.addHandler(self.handler)
self.logger.setLevel(logging.DEBUG)
self.logger.propagate = False
def __call__(self, log_level: str, msg: str):
self.logger.log(log_level, msg)
logger = Logger()

@ -26,9 +26,9 @@ from kaldiio import WriteHelper
from yacs.config import CfgNode from yacs.config import CfgNode
from ..executor import BaseExecutor from ..executor import BaseExecutor
from ..log import logger
from ..utils import cli_register from ..utils import cli_register
from ..utils import download_and_decompress from ..utils import download_and_decompress
from ..utils import logger
from ..utils import MODEL_HOME from ..utils import MODEL_HOME
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.dynamic_import import dynamic_import

@ -25,9 +25,9 @@ import yaml
from yacs.config import CfgNode from yacs.config import CfgNode
from ..executor import BaseExecutor from ..executor import BaseExecutor
from ..log import logger
from ..utils import cli_register from ..utils import cli_register
from ..utils import download_and_decompress from ..utils import download_and_decompress
from ..utils import logger
from ..utils import MODEL_HOME from ..utils import MODEL_HOME
from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.t2s.frontend import English from paddlespeech.t2s.frontend import English
@ -535,7 +535,7 @@ class TTSExecutor(BaseExecutor):
wav = self.voc_inference(mel) wav = self.voc_inference(mel)
self._outputs['wav'] = wav self._outputs['wav'] = wav
def postprocess(self, output: str='output.wav'): def postprocess(self, output: str='output.wav') -> Union[str, os.PathLike]:
""" """
Output postprocess and return results. Output postprocess and return results.
This method get model output from self._outputs and convert it into human-readable results. This method get model output from self._outputs and convert it into human-readable results.

@ -11,15 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import functools
import hashlib
import logging
import os import os
import tarfile import tarfile
import zipfile import zipfile
from typing import Any from typing import Any
from typing import Dict from typing import Dict
from typing import List
from paddle.framework import load from paddle.framework import load
@ -31,7 +27,6 @@ __all__ = [
'get_command', 'get_command',
'download_and_decompress', 'download_and_decompress',
'load_state_dict_from_url', 'load_state_dict_from_url',
'logger',
] ]
@ -59,23 +54,6 @@ def get_command(name: str) -> Any:
return com['_entry'] return com['_entry']
def _md5check(filepath: os.PathLike, md5sum: str) -> bool:
logger.info("File {} md5 checking...".format(filepath))
md5 = hashlib.md5()
with open(filepath, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
calc_md5sum = md5.hexdigest()
if calc_md5sum != md5sum:
logger.info("File {} md5 check failed, {}(calc) != "
"{}(base)".format(filepath, calc_md5sum, md5sum))
return False
else:
logger.info("File {} md5 check passed.".format(filepath))
return True
def _get_uncompress_path(filepath: os.PathLike) -> os.PathLike: def _get_uncompress_path(filepath: os.PathLike) -> os.PathLike:
file_dir = os.path.dirname(filepath) file_dir = os.path.dirname(filepath)
if tarfile.is_tarfile(filepath): if tarfile.is_tarfile(filepath):
@ -86,11 +64,12 @@ def _get_uncompress_path(filepath: os.PathLike) -> os.PathLike:
file_list = files.namelist() file_list = files.namelist()
else: else:
return file_dir return file_dir
if _is_a_single_file(file_list):
if download._is_a_single_file(file_list):
rootpath = file_list[0] rootpath = file_list[0]
uncompressed_path = os.path.join(file_dir, rootpath) uncompressed_path = os.path.join(file_dir, rootpath)
elif _is_a_single_dir(file_list): elif download._is_a_single_dir(file_list):
rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[0] rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1]
uncompressed_path = os.path.join(file_dir, rootpath) uncompressed_path = os.path.join(file_dir, rootpath)
else: else:
rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1] rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1]
@ -100,28 +79,6 @@ def _get_uncompress_path(filepath: os.PathLike) -> os.PathLike:
return uncompressed_path return uncompressed_path
def _is_a_single_file(file_list: List[os.PathLike]) -> bool:
if len(file_list) == 1 and file_list[0].find(os.sep) < -1:
return True
return False
def _is_a_single_dir(file_list: List[os.PathLike]) -> bool:
new_file_list = []
for file_path in file_list:
if '/' in file_path:
file_path = file_path.replace('/', os.sep)
elif '\\' in file_path:
file_path = file_path.replace('\\', os.sep)
new_file_list.append(file_path)
file_name = new_file_list[0].split(os.sep)[0]
for i in range(1, len(new_file_list)):
if file_name != new_file_list[i].split(os.sep)[0]:
return False
return True
def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike: def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike:
""" """
Download archieves and decompress to specific path. Download archieves and decompress to specific path.
@ -133,7 +90,8 @@ def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike:
'Dictionary keys of "url" and "md5" are required in the archive, but got: {}'.format(list(archive.keys())) 'Dictionary keys of "url" and "md5" are required in the archive, but got: {}'.format(list(archive.keys()))
filepath = os.path.join(path, os.path.basename(archive['url'])) filepath = os.path.join(path, os.path.basename(archive['url']))
if os.path.isfile(filepath) and _md5check(filepath, archive['md5']): if os.path.isfile(filepath) and download._md5check(filepath,
archive['md5']):
uncompress_path = _get_uncompress_path(filepath) uncompress_path = _get_uncompress_path(filepath)
if not os.path.isdir(uncompress_path): if not os.path.isdir(uncompress_path):
download._decompress(filepath) download._decompress(filepath)
@ -183,44 +141,3 @@ def _get_sub_home(directory):
PPSPEECH_HOME = _get_paddlespcceh_home() PPSPEECH_HOME = _get_paddlespcceh_home()
MODEL_HOME = _get_sub_home('models') MODEL_HOME = _get_sub_home('models')
class Logger(object):
def __init__(self, name: str=None):
name = 'PaddleSpeech' if not name else name
self.logger = logging.getLogger(name)
log_config = {
'DEBUG': 10,
'INFO': 20,
'TRAIN': 21,
'EVAL': 22,
'WARNING': 30,
'ERROR': 40,
'CRITICAL': 50,
'EXCEPTION': 100,
}
for key, level in log_config.items():
logging.addLevelName(level, key)
if key == 'EXCEPTION':
self.__dict__[key.lower()] = self.logger.exception
else:
self.__dict__[key.lower()] = functools.partial(self.__call__,
level)
self.format = logging.Formatter(
fmt='[%(asctime)-15s] [%(levelname)8s] [%(filename)s] [L%(lineno)d] - %(message)s'
)
self.handler = logging.StreamHandler()
self.handler.setFormatter(self.format)
self.logger.addHandler(self.handler)
self.logger.setLevel(logging.DEBUG)
self.logger.propagate = False
def __call__(self, log_level: str, msg: str):
self.logger.log(log_level, msg)
logger = Logger()

Loading…
Cancel
Save