From 571b13c53f8c50a522cbf72b122d2466041af020 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 4 Aug 2021 06:56:21 +0000 Subject: [PATCH 1/2] add dur utils --- examples/librispeech/s1/README.md | 7 +++++++ examples/librispeech/s1/path.sh | 4 ++-- examples/librispeech/s1/utils | 1 + setup.sh | 6 +++--- utils/duration_from_maniefst.sh | 10 ++++++++++ 5 files changed, 23 insertions(+), 5 deletions(-) create mode 120000 examples/librispeech/s1/utils create mode 100644 utils/duration_from_maniefst.sh diff --git a/examples/librispeech/s1/README.md b/examples/librispeech/s1/README.md index 080b340e..7f89b808 100644 --- a/examples/librispeech/s1/README.md +++ b/examples/librispeech/s1/README.md @@ -1,5 +1,12 @@ # LibriSpeech +## Data + +| Data Subset | Duration in Seconds | +| data/manifest.train | 0.83s ~ 29.735s | +| data/manifest.dev | 1.065 ~ 35.155s | +| data/manifest.test-clean | 1.285s ~ 34.955s | + ## Conformer | Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | diff --git a/examples/librispeech/s1/path.sh b/examples/librispeech/s1/path.sh index 30adb6ca..22fb1255 100644 --- a/examples/librispeech/s1/path.sh +++ b/examples/librispeech/s1/path.sh @@ -1,10 +1,10 @@ export MAIN_ROOT=${PWD}/../../../ -export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH} +export PATH=${MAIN_ROOT}:${PWD}/utils:${PATH} export LC_ALL=C # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C -export PYTHONIOENCODING=UTF-8 +export PYTHONIOENCODING=UTF-8 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ diff --git a/examples/librispeech/s1/utils b/examples/librispeech/s1/utils new file mode 120000 index 00000000..973afe67 --- /dev/null +++ b/examples/librispeech/s1/utils @@ -0,0 +1 @@ +../../../utils \ No newline at end of file diff --git a/setup.sh b/setup.sh index 384d62d2..b340d47b 100644 --- a/setup.sh +++ b/setup.sh @@ -10,7 +10,7 @@ fi if [ -e /etc/lsb-release ];then #${SUDO} apt-get update - ${SUDO} apt-get install -y vim tig tree sox pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev + ${SUDO} apt-get install -y jq vim tig tree sox pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev if [ $? != 0 ]; then error_msg "Please using Ubuntu or install pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev by user." exit -1 @@ -54,10 +54,10 @@ if [ $? != 0 ]; then fi cd AutoLog pip install -r requirements.txt - python setup.py install + python setup.py install cd .. rm -rf AutoLog -fi +fi # install decoders python3 -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")" diff --git a/utils/duration_from_maniefst.sh b/utils/duration_from_maniefst.sh new file mode 100644 index 00000000..fae579c7 --- /dev/null +++ b/utils/duration_from_maniefst.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +if [ $# == 1 ];then + echo "usage: ${0} manifest_file" + exit -1 +fi + +manifest=$1 + +jq -S '.feat_shape[0]' ${manifest} | sort -nu From ccdfd5b342696744a5b74c0c69a3a2ab257757a7 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 4 Aug 2021 06:58:28 +0000 Subject: [PATCH 2/2] format --- deepspeech/decoders/swig/setup.py | 5 ++--- deepspeech/models/ds2/__init__.py | 18 ++++++++++++++---- deepspeech/models/ds2/deepspeech2.py | 10 +++++----- examples/callcenter/s1/README.md | 2 +- examples/librispeech/s1/README.md | 6 +++--- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/deepspeech/decoders/swig/setup.py b/deepspeech/decoders/swig/setup.py index 3da5ce8b..86af475a 100644 --- a/deepspeech/decoders/swig/setup.py +++ b/deepspeech/decoders/swig/setup.py @@ -84,9 +84,8 @@ FILES = glob.glob('kenlm/util/*.cc') \ FILES += glob.glob('openfst-1.6.3/src/lib/*.cc') FILES = [ - fn for fn in FILES - if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith( - 'unittest.cc')) + fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc') + or fn.endswith('unittest.cc')) ] LIBS = ['stdc++'] diff --git a/deepspeech/models/ds2/__init__.py b/deepspeech/models/ds2/__init__.py index 299f901c..39bea5bf 100644 --- a/deepspeech/models/ds2/__init__.py +++ b/deepspeech/models/ds2/__init__.py @@ -1,7 +1,17 @@ -from .deepspeech2 import DeepSpeech2Model +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from .deepspeech2 import DeepSpeech2InferModel +from .deepspeech2 import DeepSpeech2Model __all__ = ['DeepSpeech2Model', 'DeepSpeech2InferModel'] - - - diff --git a/deepspeech/models/ds2/deepspeech2.py b/deepspeech/models/ds2/deepspeech2.py index 0bd5fb95..8d737e80 100644 --- a/deepspeech/models/ds2/deepspeech2.py +++ b/deepspeech/models/ds2/deepspeech2.py @@ -19,15 +19,15 @@ from paddle import nn from yacs.config import CfgNode from deepspeech.models.ds2.conv import ConvStack -from deepspeech.modules.ctc import CTCDecoder from deepspeech.models.ds2.rnn import RNNStack +from deepspeech.modules.ctc import CTCDecoder from deepspeech.utils import layer_tools from deepspeech.utils.checkpoint import Checkpoint from deepspeech.utils.log import Log logger = Log(__name__).getlog() -__all__ = ['DeepSpeech2Model', 'DeepSpeech2InferMode'] +__all__ = ['DeepSpeech2Model', 'DeepSpeech2InferModel'] class CRNNEncoder(nn.Layer): @@ -117,7 +117,7 @@ class DeepSpeech2Model(nn.Layer): :type share_weights: bool :return: A tuple of an output unnormalized log probability layer ( before softmax) and a ctc cost layer. - :rtype: tuple of LayerOutput + :rtype: tuple of LayerOutput """ @classmethod @@ -206,10 +206,10 @@ class DeepSpeech2Model(nn.Layer): config: yacs.config.CfgNode model configs - + checkpoint_path: Path or str the path of pretrained model checkpoint, without extension name - + Returns ------- DeepSpeech2Model diff --git a/examples/callcenter/s1/README.md b/examples/callcenter/s1/README.md index a83a516b..b9fa1472 100644 --- a/examples/callcenter/s1/README.md +++ b/examples/callcenter/s1/README.md @@ -17,4 +17,4 @@ | conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention | 16, -1 | 2.23287845 | 0.087982 | | conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_greedy_search | 16, -1 | 2.23287845 | 0.086962 | | conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | 16, -1 | 2.23287845 | 0.086741 | -| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention_rescoring | 16, -1 | 2.23287845 | 0.083495 | +| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention_rescoring | 16, -1 | 2.23287845 | 0.083495 | diff --git a/examples/librispeech/s1/README.md b/examples/librispeech/s1/README.md index 7f89b808..79b5b80e 100644 --- a/examples/librispeech/s1/README.md +++ b/examples/librispeech/s1/README.md @@ -2,10 +2,10 @@ ## Data -| Data Subset | Duration in Seconds | -| data/manifest.train | 0.83s ~ 29.735s | +| Data Subset | Duration in Seconds | +| data/manifest.train | 0.83s ~ 29.735s | | data/manifest.dev | 1.065 ~ 35.155s | -| data/manifest.test-clean | 1.285s ~ 34.955s | +| data/manifest.test-clean | 1.285s ~ 34.955s | ## Conformer