PaddleSpeech/deepspeech/exps/deepspeech2/config.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from yacs.config import CfgNode as CN
from deepspeech.models.deepspeech2 import DeepSpeech2Model

_C = CN()
_C.data = CN(
    dict(
        train_manifest="",
        dev_manifest="",
        test_manifest="",
        vocab_filepath="",
        mean_std_filepath="",
        augmentation_config="",
        max_duration=float('inf'),
        min_duration=0.0,
        stride_ms=10.0,  # ms
        window_ms=20.0,  # ms
        n_fft=None,  # fft points
        max_freq=None,  # None for samplerate/2
        specgram_type='linear',  # 'linear', 'mfcc'
        target_sample_rate=16000,  # sample rate
        use_dB_normalization=True,
        target_dB=-20,
        random_seed=0,
        keep_transcription_text=False,
        batch_size=32,  # batch size
        num_workers=0,  # data loader workers
        sortagrad=False,  # sorted in first epoch when True
        shuffle_method="batch_shuffle",  # 'batch_shuffle', 'instance_shuffle'
    ))

_C.model = CN(
    dict(
        num_conv_layers=2,  #Number of stacking convolution layers.
        num_rnn_layers=3,  #Number of stacking RNN layers.
        rnn_layer_size=1024,  #RNN layer size (number of RNN cells).
        use_gru=True,  #Use gru if set True. Use simple rnn if set False.
        share_rnn_weights=True  #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.
    ))

DeepSpeech2Model.params(_C.model)

_C.training = CN(
    dict(
        lr=5e-4,  # learning rate
        lr_decay=1.0,  # learning rate decay
        weight_decay=1e-6,  # the coeff of weight decay
        global_grad_clip=5.0,  # the global norm clip
        n_epoch=50,  # train epochs
    ))

_C.decoding = CN(
    dict(
        alpha=2.5,  # Coef of LM for beam search.
        beta=0.3,  # Coef of WC for beam search.
        cutoff_prob=1.0,  # Cutoff probability for pruning.
        cutoff_top_n=40,  # Cutoff number for pruning.
        lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm',  # Filepath for language model.
        decoding_method='ctc_beam_search',  # Decoding method. Options: ctc_beam_search, ctc_greedy
        error_rate_type='wer',  # Error rate type for evaluation. Options `wer`, 'cer'
        num_proc_bsearch=8,  # # of CPUs for beam search.
        beam_size=500,  # Beam search width.
        batch_size=128,  # decoding batch size
    ))


def get_cfg_defaults():
    """Get a yacs CfgNode object with default values for my_project."""
    # Return a clone so that the defaults will not be altered
    # This is for the "local variable" use pattern
    return _C.clone()
Support paddle 2.x (#538) * 2.x model * model test pass * fix data * fix soundfile with flac support * one thread dataloader test pass * export feasture size add trainer and utils add setup model and dataloader update travis using Bionic dist * add venv; test under venv * fix unittest; train and valid * add train and config * add config and train script * fix ctc cuda memcopy error * fix imports * fix train valid log * fix dataset batch shuffle shift start from 1 fix rank_zero_only decreator error close tensorboard when train over add decoding config and code * test process can run * test with decoding * test and infer with decoding * fix infer * fix ctc loss lr schedule sortagrad logger * aishell egs * refactor train add aishell egs * fix dataset batch shuffle and add batch sampler log print model parameter * fix model and ctc * sequence_mask make all inputs zeros, which cause grad be zero, this is a bug of LessThanOp add grad clip by global norm add model train test notebook * ctc loss remove run prefix using ord value as text id * using unk when training compute_loss need text ids ord id using in test mode, which compute wer/cer * fix tester * add lr_deacy refactor code * fix tools * fix ci add tune fix gru model bugs add dataset and model test * fix decoding * refactor repo fix decoding * fix musan and rir dataset * refactor io, loss, conv, rnn, gradclip, model, utils * fix ci and import * refactor model add export jit model * add deploy bin and test it * rm uselss egs * add layer tools * refactor socket server new model from pretrain * remve useless * fix instability loss and grad nan or inf for librispeech training * fix sampler * fix libri train.sh * fix doc * add license on cpp * fix doc * fix libri script * fix install * clip 5 wer 7.39, clip 400 wer 7.54, 1.8 clip 400 baseline 7.49 4 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`from yacs.config import CfgNode as CN`
			`from deepspeech.models.deepspeech2 import DeepSpeech2Model`

			`_C = CN()`
			`_C.data = CN(`
			`dict(`
			`train_manifest="",`
			`dev_manifest="",`
			`test_manifest="",`
			`vocab_filepath="",`
			`mean_std_filepath="",`
			`augmentation_config="",`
			`max_duration=float('inf'),`
			`min_duration=0.0,`
			`stride_ms=10.0, # ms`
			`window_ms=20.0, # ms`
			`n_fft=None, # fft points`
			`max_freq=None, # None for samplerate/2`
			`specgram_type='linear', # 'linear', 'mfcc'`
			`target_sample_rate=16000, # sample rate`
			`use_dB_normalization=True,`
			`target_dB=-20,`
			`random_seed=0,`
			`keep_transcription_text=False,`
			`batch_size=32, # batch size`
			`num_workers=0, # data loader workers`
			`sortagrad=False, # sorted in first epoch when True`
			`shuffle_method="batch_shuffle", # 'batch_shuffle', 'instance_shuffle'`
			`))`

			`_C.model = CN(`
			`dict(`
			`num_conv_layers=2, #Number of stacking convolution layers.`
			`num_rnn_layers=3, #Number of stacking RNN layers.`
			`rnn_layer_size=1024, #RNN layer size (number of RNN cells).`
			`use_gru=True, #Use gru if set True. Use simple rnn if set False.`
			`share_rnn_weights=True #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.`
			`))`

			`DeepSpeech2Model.params(_C.model)`

			`_C.training = CN(`
			`dict(`
			`lr=5e-4, # learning rate`
			`lr_decay=1.0, # learning rate decay`
			`weight_decay=1e-6, # the coeff of weight decay`
			`global_grad_clip=5.0, # the global norm clip`
			`n_epoch=50, # train epochs`
			`))`

			`_C.decoding = CN(`
			`dict(`
			`alpha=2.5, # Coef of LM for beam search.`
			`beta=0.3, # Coef of WC for beam search.`
			`cutoff_prob=1.0, # Cutoff probability for pruning.`
			`cutoff_top_n=40, # Cutoff number for pruning.`
			`lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm', # Filepath for language model.`
			`decoding_method='ctc_beam_search', # Decoding method. Options: ctc_beam_search, ctc_greedy`
			error_rate_type='wer', # Error rate type for evaluation. Options `wer`, 'cer'
			`num_proc_bsearch=8, # # of CPUs for beam search.`
			`beam_size=500, # Beam search width.`
			`batch_size=128, # decoding batch size`
			`))`


			`def get_cfg_defaults():`
			`"""Get a yacs CfgNode object with default values for my_project."""`
			`# Return a clone so that the defaults will not be altered`
			`# This is for the "local variable" use pattern`
			`return _C.clone()`