Merge pull request #741 from PaddlePaddle/utils

duration utils
pull/742/head
Hui Zhang 3 years ago committed by GitHub
commit 254d753b1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -84,9 +84,8 @@ FILES = glob.glob('kenlm/util/*.cc') \
FILES += glob.glob('openfst-1.6.3/src/lib/*.cc') FILES += glob.glob('openfst-1.6.3/src/lib/*.cc')
FILES = [ FILES = [
fn for fn in FILES fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')
if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith( or fn.endswith('unittest.cc'))
'unittest.cc'))
] ]
LIBS = ['stdc++'] LIBS = ['stdc++']

@ -1,7 +1,17 @@
from .deepspeech2 import DeepSpeech2Model # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .deepspeech2 import DeepSpeech2InferModel from .deepspeech2 import DeepSpeech2InferModel
from .deepspeech2 import DeepSpeech2Model
__all__ = ['DeepSpeech2Model', 'DeepSpeech2InferModel'] __all__ = ['DeepSpeech2Model', 'DeepSpeech2InferModel']

@ -19,15 +19,15 @@ from paddle import nn
from yacs.config import CfgNode from yacs.config import CfgNode
from deepspeech.models.ds2.conv import ConvStack from deepspeech.models.ds2.conv import ConvStack
from deepspeech.modules.ctc import CTCDecoder
from deepspeech.models.ds2.rnn import RNNStack from deepspeech.models.ds2.rnn import RNNStack
from deepspeech.modules.ctc import CTCDecoder
from deepspeech.utils import layer_tools from deepspeech.utils import layer_tools
from deepspeech.utils.checkpoint import Checkpoint from deepspeech.utils.checkpoint import Checkpoint
from deepspeech.utils.log import Log from deepspeech.utils.log import Log
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
__all__ = ['DeepSpeech2Model', 'DeepSpeech2InferMode'] __all__ = ['DeepSpeech2Model', 'DeepSpeech2InferModel']
class CRNNEncoder(nn.Layer): class CRNNEncoder(nn.Layer):
@ -117,7 +117,7 @@ class DeepSpeech2Model(nn.Layer):
:type share_weights: bool :type share_weights: bool
:return: A tuple of an output unnormalized log probability layer ( :return: A tuple of an output unnormalized log probability layer (
before softmax) and a ctc cost layer. before softmax) and a ctc cost layer.
:rtype: tuple of LayerOutput :rtype: tuple of LayerOutput
""" """
@classmethod @classmethod
@ -206,10 +206,10 @@ class DeepSpeech2Model(nn.Layer):
config: yacs.config.CfgNode config: yacs.config.CfgNode
model configs model configs
checkpoint_path: Path or str checkpoint_path: Path or str
the path of pretrained model checkpoint, without extension name the path of pretrained model checkpoint, without extension name
Returns Returns
------- -------
DeepSpeech2Model DeepSpeech2Model

@ -17,4 +17,4 @@
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention | 16, -1 | 2.23287845 | 0.087982 | | conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention | 16, -1 | 2.23287845 | 0.087982 |
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_greedy_search | 16, -1 | 2.23287845 | 0.086962 | | conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_greedy_search | 16, -1 | 2.23287845 | 0.086962 |
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | 16, -1 | 2.23287845 | 0.086741 | | conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | 16, -1 | 2.23287845 | 0.086741 |
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention_rescoring | 16, -1 | 2.23287845 | 0.083495 | | conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention_rescoring | 16, -1 | 2.23287845 | 0.083495 |

@ -1,5 +1,12 @@
# LibriSpeech # LibriSpeech
## Data
| Data Subset | Duration in Seconds |
| data/manifest.train | 0.83s ~ 29.735s |
| data/manifest.dev | 1.065 ~ 35.155s |
| data/manifest.test-clean | 1.285s ~ 34.955s |
## Conformer ## Conformer
| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | | Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |

@ -1,10 +1,10 @@
export MAIN_ROOT=${PWD}/../../../ export MAIN_ROOT=${PWD}/../../../
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH} export PATH=${MAIN_ROOT}:${PWD}/utils:${PATH}
export LC_ALL=C export LC_ALL=C
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

@ -10,7 +10,7 @@ fi
if [ -e /etc/lsb-release ];then if [ -e /etc/lsb-release ];then
#${SUDO} apt-get update #${SUDO} apt-get update
${SUDO} apt-get install -y vim tig tree sox pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev ${SUDO} apt-get install -y jq vim tig tree sox pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev
if [ $? != 0 ]; then if [ $? != 0 ]; then
error_msg "Please using Ubuntu or install pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev by user." error_msg "Please using Ubuntu or install pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev by user."
exit -1 exit -1
@ -54,10 +54,10 @@ if [ $? != 0 ]; then
fi fi
cd AutoLog cd AutoLog
pip install -r requirements.txt pip install -r requirements.txt
python setup.py install python setup.py install
cd .. cd ..
rm -rf AutoLog rm -rf AutoLog
fi fi
# install decoders # install decoders
python3 -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")" python3 -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")"

@ -0,0 +1,10 @@
#!/bin/bash
if [ $# == 1 ];then
echo "usage: ${0} manifest_file"
exit -1
fi
manifest=$1
jq -S '.feat_shape[0]' ${manifest} | sort -nu
Loading…
Cancel
Save