diff --git a/dataset/aidatatang_200zh/aidatatang_200zh.py b/dataset/aidatatang_200zh/aidatatang_200zh.py index b8758c9a..85f478c2 100644 --- a/dataset/aidatatang_200zh/aidatatang_200zh.py +++ b/dataset/aidatatang_200zh/aidatatang_200zh.py @@ -25,6 +25,7 @@ import os from pathlib import Path import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/aishell/aishell.py b/dataset/aishell/aishell.py index 32dc119d..7431fc08 100644 --- a/dataset/aishell/aishell.py +++ b/dataset/aishell/aishell.py @@ -25,6 +25,7 @@ import os from pathlib import Path import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py index 0c779696..69f0db59 100644 --- a/dataset/librispeech/librispeech.py +++ b/dataset/librispeech/librispeech.py @@ -27,6 +27,7 @@ import os from multiprocessing.pool import Pool import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/mini_librispeech/mini_librispeech.py b/dataset/mini_librispeech/mini_librispeech.py index d96b5d64..730c73a8 100644 --- a/dataset/mini_librispeech/mini_librispeech.py +++ b/dataset/mini_librispeech/mini_librispeech.py @@ -26,6 +26,7 @@ import os from multiprocessing.pool import Pool import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/musan/musan.py b/dataset/musan/musan.py index dc237c30..2ac701be 100644 --- a/dataset/musan/musan.py +++ b/dataset/musan/musan.py @@ -28,6 +28,7 @@ import json import os import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/rir_noise/rir_noise.py b/dataset/rir_noise/rir_noise.py index 0e055f17..e7b12289 100644 --- a/dataset/rir_noise/rir_noise.py +++ b/dataset/rir_noise/rir_noise.py @@ -28,6 +28,7 @@ import json import os import soundfile + from utils.utility import download from utils.utility import unzip diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py index 879ed58d..cdfc0a75 100644 --- a/dataset/thchs30/thchs30.py +++ b/dataset/thchs30/thchs30.py @@ -26,6 +26,7 @@ from multiprocessing.pool import Pool from pathlib import Path import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py index d03c48a1..c4a9f066 100644 --- a/dataset/timit/timit.py +++ b/dataset/timit/timit.py @@ -27,6 +27,7 @@ import string from pathlib import Path import soundfile + from utils.utility import unzip URL_ROOT = "" diff --git a/dataset/voxforge/voxforge.py b/dataset/voxforge/voxforge.py index c388f449..373791bf 100644 --- a/dataset/voxforge/voxforge.py +++ b/dataset/voxforge/voxforge.py @@ -27,6 +27,7 @@ import shutil import subprocess import soundfile + from utils.utility import download_multi from utils.utility import getfile_insensitive from utils.utility import unpack diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 1b61ccc7..91ef6d16 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -75,4 +75,3 @@ PANN | ESC-50 |[pann-esc50]("./examples/esc50/cls0")|[panns_cnn6.tar.gz](https:/ | [Ds2 Offline Aishell model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_v1.8_to_v2.x.tar.gz) | Aishell Dataset | Char-based | 234 MB | 2 Conv + 3 bidirectional GRU layers | 0.0804 | - | 151 h | | [Ds2 Offline Librispeech model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_v1.8_to_v2.x.tar.gz) | Librispeech Dataset | Word-based | 307 MB | 2 Conv + 3 bidirectional sharing weight RNN layers | - | 0.0685 | 960 h | | [Ds2 Offline Baidu en8k model](https://deepspeech.bj.bcebos.com/eng_models/baidu_en8k_v1.8_to_v2.x.tar.gz) | Baidu Internal English Dataset | Word-based | 273 MB | 2 Conv + 3 bidirectional GRU layers | - | 0.0541 | 8628 h | - diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py index c82168ae..80ca7a66 100644 --- a/paddlespeech/cli/__init__.py +++ b/paddlespeech/cli/__init__.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import _locale + from .asr import ASRExecutor from .base_commands import BaseCommand from .base_commands import HelpCommand diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index ee566ed4..d4e5c22f 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,10 +16,11 @@ import os import numpy as np from paddle import inference +from scipy.special import softmax + from paddleaudio.backends import load as load_audio from paddleaudio.datasets import ESC50 from paddleaudio.features import melspectrogram -from scipy.special import softmax # yapf: disable parser = argparse.ArgumentParser() diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index 63b22981..c295c6a3 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle -from paddleaudio.datasets import ESC50 +from paddleaudio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index 0a1b6ccc..9cfd8b6c 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -16,11 +16,11 @@ import argparse import numpy as np import paddle import paddle.nn.functional as F + from paddleaudio.backends import load as load_audio from paddleaudio.datasets import ESC50 from paddleaudio.features import LogMelSpectrogram from paddleaudio.features import melspectrogram - from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index 9508a977..12130978 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -15,11 +15,11 @@ import argparse import os import paddle + from paddleaudio.datasets import ESC50 from paddleaudio.features import LogMelSpectrogram from paddleaudio.utils import logger from paddleaudio.utils import Timer - from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index b442b2fd..6d2dac56 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,6 +15,7 @@ import os import paddle.nn as nn import paddle.nn.functional as F + from paddleaudio.utils.download import load_state_dict_from_url from paddleaudio.utils.env import MODEL_HOME diff --git a/paddlespeech/s2t/frontend/audio.py b/paddlespeech/s2t/frontend/audio.py index d494cc4f..d0368cc8 100644 --- a/paddlespeech/s2t/frontend/audio.py +++ b/paddlespeech/s2t/frontend/audio.py @@ -356,7 +356,7 @@ class AudioSegment(): # sox, slow try: import soxbindings as sox - except: + except ImportError: try: from paddlespeech.s2t.utils import dynamic_pip_install package = "sox" @@ -364,8 +364,9 @@ class AudioSegment(): package = "soxbindings" dynamic_pip_install.install(package) import soxbindings as sox - except: - raise RuntimeError("Can not install soxbindings on your system." ) + except Exception: + raise RuntimeError( + "Can not install soxbindings on your system.") tfm = sox.Transformer() tfm.set_globals(multithread=False) diff --git a/paddlespeech/s2t/frontend/utility.py b/paddlespeech/s2t/frontend/utility.py index e6c7603f..d35785db 100644 --- a/paddlespeech/s2t/frontend/utility.py +++ b/paddlespeech/s2t/frontend/utility.py @@ -102,9 +102,11 @@ def read_manifest( with jsonlines.open(manifest_path, 'r') as reader: for json_data in reader: feat_len = json_data["input"][0]["shape"][ - 0] if "input" in json_data and "shape" in json_data["input"][0] else 1.0 + 0] if "input" in json_data and "shape" in json_data["input"][ + 0] else 1.0 token_len = json_data["output"][0]["shape"][ - 0] if "output" in json_data and "shape" in json_data["output"][0] else 1.0 + 0] if "output" in json_data and "shape" in json_data["output"][ + 0] else 1.0 conditions = [ feat_len >= min_input_len, feat_len <= max_input_len, diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py index 35b57524..ac55af12 100644 --- a/paddlespeech/s2t/io/sampler.py +++ b/paddlespeech/s2t/io/sampler.py @@ -20,13 +20,13 @@ from paddle.io import DistributedBatchSampler from paddlespeech.s2t.utils.log import Log +logger = Log(__name__).getlog() + __all__ = [ "SortagradDistributedBatchSampler", "SortagradBatchSampler", ] -logger = Log(__name__).getlog() - def _batch_shuffle(indices, batch_size, epoch, clipped=False): """Put similarly-sized instances into minibatches for better efficiency diff --git a/paddlespeech/s2t/models/ds2/__init__.py b/paddlespeech/s2t/models/ds2/__init__.py index efa50863..8d5959c8 100644 --- a/paddlespeech/s2t/models/ds2/__init__.py +++ b/paddlespeech/s2t/models/ds2/__init__.py @@ -17,11 +17,11 @@ from paddlespeech.s2t.utils import dynamic_pip_install try: import swig_decoders -except: +except ImportError: try: package_name = 'paddlespeech_ctcdecoders' dynamic_pip_install.install(package_name) - except: + except Exception: raise RuntimeError( "Can not install package paddlespeech_ctcdecoders on your system. \ The DeepSpeech2 model is not supported for your system") diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py index f0a553ec..0dfaec29 100644 --- a/paddlespeech/s2t/models/ds2/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2/deepspeech2.py @@ -129,7 +129,7 @@ class DeepSpeech2Model(nn.Layer): rnn_layer_size=1024, #RNN layer size (number of RNN cells). use_gru=True, #Use gru if set True. Use simple rnn if set False. share_rnn_weights=True, #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported. - ctc_grad_norm_type=None,)) + ctc_grad_norm_type=None, )) if config is not None: config.merge_from_other_cfg(default) return default diff --git a/paddlespeech/s2t/models/ds2_online/__init__.py b/paddlespeech/s2t/models/ds2_online/__init__.py index 65ddd512..2d304237 100644 --- a/paddlespeech/s2t/models/ds2_online/__init__.py +++ b/paddlespeech/s2t/models/ds2_online/__init__.py @@ -17,11 +17,11 @@ from paddlespeech.s2t.utils import dynamic_pip_install try: import swig_decoders -except: +except ImportError: try: package_name = 'paddlespeech_ctcdecoders' dynamic_pip_install.install(package_name) - except: + except Exception: raise RuntimeError( "Can not install package paddlespeech_ctcdecoders on your system. \ The DeepSpeech2 model is not supported for your system") diff --git a/paddlespeech/s2t/modules/ctc.py b/paddlespeech/s2t/modules/ctc.py index 774bcc62..ffc9f038 100644 --- a/paddlespeech/s2t/modules/ctc.py +++ b/paddlespeech/s2t/modules/ctc.py @@ -28,7 +28,7 @@ try: from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import ctc_beam_search_decoder_batch # noqa: F401 from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import ctc_greedy_decoder # noqa: F401 from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import Scorer # noqa: F401 -except: +except ImportError: try: from paddlespeech.s2t.utils import dynamic_pip_install package_name = 'paddlespeech_ctcdecoders' diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py index cc8f5031..9bf1ca4d 100644 --- a/paddlespeech/s2t/training/trainer.py +++ b/paddlespeech/s2t/training/trainer.py @@ -221,6 +221,8 @@ class Trainer(): if hasattr(self.train_loader, "batch_sampler"): batch_sampler = self.train_loader.batch_sampler if isinstance(batch_sampler, paddle.io.DistributedBatchSampler): + logger.debug( + f"train_loader.batch_sample set epoch: {self.epoch}") batch_sampler.set_epoch(self.epoch) def before_train(self): diff --git a/paddlespeech/s2t/transform/perturb.py b/paddlespeech/s2t/transform/perturb.py index 90144197..226885f3 100644 --- a/paddlespeech/s2t/transform/perturb.py +++ b/paddlespeech/s2t/transform/perturb.py @@ -147,7 +147,7 @@ class SpeedPerturbationSox(): try: import soxbindings as sox - except: + except ImportError: try: from paddlespeech.s2t.utils import dynamic_pip_install package = "sox" @@ -155,8 +155,10 @@ class SpeedPerturbationSox(): package = "soxbindings" dynamic_pip_install.install(package) import soxbindings as sox - except: - raise RuntimeError("Can not install soxbindings on your system." ) + except Exception: + raise RuntimeError( + "Can not install soxbindings on your system.") + self.sox = sox if utt2ratio is not None: self.utt2ratio = {} @@ -200,7 +202,7 @@ class SpeedPerturbationSox(): else: ratio = self.state.uniform(self.lower, self.upper) - tfm = sox.Transformer() + tfm = self.sox.Transformer() tfm.set_globals(multithread=False) tfm.speed(ratio) y = tfm.build_array(input_array=x, sample_rate_in=self.sr) diff --git a/utils/manifest_key_value.py b/utils/manifest_key_value.py index 3a800903..fb3d3aaa 100755 --- a/utils/manifest_key_value.py +++ b/utils/manifest_key_value.py @@ -5,6 +5,7 @@ import functools from pathlib import Path import jsonlines + from utils.utility import add_arguments from utils.utility import print_arguments