pull/578/head
Hui Zhang 5 years ago
parent dee672a753
commit 22fce19101

@ -79,9 +79,9 @@ def inference(config, args):
def start_server(config, args): def start_server(config, args):
"""Start the ASR server""" """Start the ASR server"""
config.defrost()
config.data.manfiest = config.data.test_manifest config.data.manfiest = config.data.test_manifest
config.data.augmentation_config = io.StringIO( config.data.augmentation_config = ""
initial_value='{}', newline='')
config.data.keep_transcription_text = True config.data.keep_transcription_text = True
dataset = ManifestDataset.from_config(config) dataset = ManifestDataset.from_config(config)

@ -31,9 +31,9 @@ from deepspeech.io.dataset import ManifestDataset
def start_server(config, args): def start_server(config, args):
"""Start the ASR server""" """Start the ASR server"""
config.defrost()
config.data.manfiest = config.data.test_manifest config.data.manfiest = config.data.test_manifest
config.data.augmentation_config = io.StringIO( config.data.augmentation_config = ""
initial_value='{}', newline='')
config.data.keep_transcription_text = True config.data.keep_transcription_text = True
dataset = ManifestDataset.from_config(config) dataset = ManifestDataset.from_config(config)

@ -36,10 +36,9 @@ def tune(config, args):
raise ValueError("num_alphas must be non-negative!") raise ValueError("num_alphas must be non-negative!")
if not args.num_betas >= 0: if not args.num_betas >= 0:
raise ValueError("num_betas must be non-negative!") raise ValueError("num_betas must be non-negative!")
config.defrost()
config.data.manfiest = config.data.dev_manifest config.data.manfiest = config.data.dev_manifest
config.data.augmentation_config = io.StringIO( config.data.augmentation_config = ""
initial_value='{}', newline='')
config.data.keep_transcription_text = True config.data.keep_transcription_text = True
dev_dataset = ManifestDataset.from_config(config) dev_dataset = ManifestDataset.from_config(config)

@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
"""Contains DeepSpeech2 model.""" """Contains DeepSpeech2 model."""
import io
import time import time
import logging import logging
import numpy as np import numpy as np
@ -24,7 +23,7 @@ import paddle
from paddle import distributed as dist from paddle import distributed as dist
from paddle.io import DataLoader from paddle.io import DataLoader
from deepspeech.training import Trainer from deepspeech.training.trainer import Trainer
from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog
from deepspeech.utils import mp_tools from deepspeech.utils import mp_tools
@ -140,15 +139,15 @@ class DeepSpeech2Trainer(Trainer):
self.logger.info("Setup model/optimizer/lr_scheduler!") self.logger.info("Setup model/optimizer/lr_scheduler!")
def setup_dataloader(self): def setup_dataloader(self):
config = self.config config = self.config.clone()
config.defrost()
config.data.keep_transcription_text = False config.data.keep_transcription_text = False
config.data.manfiest = config.data.train_manifest config.data.manifest = config.data.train_manifest
train_dataset = ManifestDataset.from_config(config) train_dataset = ManifestDataset.from_config(config)
config.data.manfiest = config.data.dev_manifest config.data.manifest = config.data.dev_manifest
config.data.augmentation_config = io.StringIO( config.data.augmentation_config = ""
initial_value='{}', newline='')
dev_dataset = ManifestDataset.from_config(config) dev_dataset = ManifestDataset.from_config(config)
if self.parallel: if self.parallel:
@ -324,13 +323,12 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
self.logger.info("Setup model!") self.logger.info("Setup model!")
def setup_dataloader(self): def setup_dataloader(self):
config = self.config config = self.config.clone()
config.defrost()
# return raw text # return raw text
config.data.manfiest = config.data.test_manifest config.data.manifest = config.data.test_manifest
config.data.augmentation_config = io.StringIO( config.data.augmentation_config = ""
initial_value='{}', newline='')
config.data.keep_transcription_text = True
test_dataset = ManifestDataset.from_config(config) test_dataset = ManifestDataset.from_config(config)
# return text ord id # return text ord id

@ -14,23 +14,20 @@
from yacs.config import CfgNode from yacs.config import CfgNode
from deepspeech.io.dataset import ManifestDataset
from deepspeech.models.u2 import U2Model from deepspeech.models.u2 import U2Model
from deepspeech.exps.u2.model import U2Trainer from deepspeech.exps.u2.model import U2Trainer
from deepspeech.exps.u2.model import U2Tester from deepspeech.exps.u2.model import U2Tester
_C = CfgNode() _C = CfgNode()
_C.data = CfgNode() _C.data = ManifestDataset.params()
ManifestDataset.params(_C.data)
_C.model = CfgNode() _C.model = U2Model.params()
U2Model.params(_C.model)
_C.training = CfgNode() _C.training = U2Trainer.params()
U2Trainer.params(_C.training)
_C.decoding = CfgNode() _C.decoding = U2Tester.params()
U2Tester.params(_C.training)
def get_cfg_defaults(): def get_cfg_defaults():

@ -18,12 +18,14 @@ import logging
import numpy as np import numpy as np
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import Optional
from yacs.config import CfgNode
import paddle import paddle
from paddle import distributed as dist from paddle import distributed as dist
from paddle.io import DataLoader from paddle.io import DataLoader
from deepspeech.training import Trainer from deepspeech.training.trainer import Trainer
from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog
from deepspeech.training.scheduler import WarmupLR from deepspeech.training.scheduler import WarmupLR
@ -77,7 +79,7 @@ class U2Trainer(Trainer):
self.model.train() self.model.train()
start = time.time() start = time.time()
loss = self.model(*batch_data) loss, attention_loss, ctc_loss = self.model(*batch_data)
loss.backward() loss.backward()
layer_tools.print_grads(self.model, print_func=None) layer_tools.print_grads(self.model, print_func=None)
if self.iteration % train_conf.accum_grad == 0: if self.iteration % train_conf.accum_grad == 0:
@ -88,13 +90,15 @@ class U2Trainer(Trainer):
losses_np = { losses_np = {
'train_loss': float(loss), 'train_loss': float(loss),
'train_loss_div_batchsize': 'train_att_loss': float(attention_loss),
float(loss) / self.config.data.batch_size 'train_ctc_loss': float(ctc_loss),
} }
msg = "Train: Rank: {}, ".format(dist.get_rank()) msg = "Train: Rank: {}, ".format(dist.get_rank())
msg += "epoch: {}, ".format(self.epoch) msg += "epoch: {}, ".format(self.epoch)
msg += "step: {}, ".format(self.iteration) msg += "step: {}, ".format(self.iteration)
msg += "time: {:>.3f}s, ".format(iteration_time) msg += "time: {:>.3f}s, ".format(iteration_time)
msg += f"batch size: {self.config.data.batch_size}, "
msg += f"accum: {train_config.accum_grad}, "
msg += ', '.join('{}: {:>.6f}'.format(k, v) msg += ', '.join('{}: {:>.6f}'.format(k, v)
for k, v in losses_np.items()) for k, v in losses_np.items())
if self.iteration % train_conf.log_interval == 0: if self.iteration % train_conf.log_interval == 0:
@ -113,11 +117,11 @@ class U2Trainer(Trainer):
f"Valid Total Examples: {len(self.valid_loader.dataset)}") f"Valid Total Examples: {len(self.valid_loader.dataset)}")
valid_losses = defaultdict(list) valid_losses = defaultdict(list)
for i, batch in enumerate(self.valid_loader): for i, batch in enumerate(self.valid_loader):
loss = self.model(*batch) total_loss, attention_loss, ctc_loss = self.model(*batch)
valid_losses['val_loss'].append(float(loss)) valid_losses['val_loss'].append(float(total_loss))
valid_losses['val_loss_div_batchsize'].append( valid_losses['val_att_loss'].append(float(attention_loss))
float(loss) / self.config.data.batch_size) valid_losses['val_ctc_loss'].append(float(ctc_loss))
# write visual log # write visual log
valid_losses = {k: np.mean(v) for k, v in valid_losses.items()} valid_losses = {k: np.mean(v) for k, v in valid_losses.items()}
@ -137,13 +141,14 @@ class U2Trainer(Trainer):
def setup_dataloader(self): def setup_dataloader(self):
config = self.config.clone() config = self.config.clone()
config.defrost()
config.data.keep_transcription_text = False config.data.keep_transcription_text = False
# train/valid dataset, return token ids # train/valid dataset, return token ids
config.data.manfiest = config.data.train_manifest config.data.manifest = config.data.train_manifest
train_dataset = ManifestDataset.from_config(config) train_dataset = ManifestDataset.from_config(config)
config.data.manfiest = config.data.dev_manifest config.data.manifest = config.data.dev_manifest
config.data.augmentation_config = "" config.data.augmentation_config = ""
dev_dataset = ManifestDataset.from_config(config) dev_dataset = ManifestDataset.from_config(config)
@ -181,7 +186,7 @@ class U2Trainer(Trainer):
# test dataset, return raw text # test dataset, return raw text
config.data.keep_transcription_text = True config.data.keep_transcription_text = True
config.data.augmentation_config = "" config.data.augmentation_config = ""
config.data.manfiest = config.data.test_manifest config.data.manifest = config.data.test_manifest
test_dataset = ManifestDataset.from_config(config) test_dataset = ManifestDataset.from_config(config)
# return text ord id # return text ord id
self.test_loader = DataLoader( self.test_loader = DataLoader(
@ -193,10 +198,12 @@ class U2Trainer(Trainer):
self.logger.info("Setup train/valid/test Dataloader!") self.logger.info("Setup train/valid/test Dataloader!")
def setup_model(self): def setup_model(self):
config = self.config.clone() config = self.config
model_conf = config.model model_conf = config.model
model_conf.defrost()
model_conf.input_dim = self.train_loader.dataset.feature_size model_conf.input_dim = self.train_loader.dataset.feature_size
model_conf.output_dim = self.train_loader.dataset.vocab_size model_conf.output_dim = self.train_loader.dataset.vocab_size
model_conf.freeze()
model = U2Model.from_config(model_conf) model = U2Model.from_config(model_conf)
if self.parallel: if self.parallel:
@ -206,12 +213,12 @@ class U2Trainer(Trainer):
train_config = config.training train_config = config.training
optim_type = train_config.optim optim_type = train_config.optim
optim_conf = train_config.train_config optim_conf = train_config.optim_conf
scheduler_type = train_config.scheduler scheduler_type = train_config.scheduler
scheduler_conf = train_config.scheduler_conf scheduler_conf = train_config.scheduler_conf
grad_clip = ClipGradByGlobalNormWithLog(train_config.global_grad_clip) grad_clip = ClipGradByGlobalNormWithLog(train_config.global_grad_clip)
weight_decay = paddle.regularizer.L2Decay(train_config.weight_decay) weight_decay = paddle.regularizer.L2Decay(optim_conf.weight_decay)
if scheduler_type == 'expdecaylr': if scheduler_type == 'expdecaylr':
lr_scheduler = paddle.optimizer.lr.ExponentialDecay( lr_scheduler = paddle.optimizer.lr.ExponentialDecay(

@ -93,5 +93,5 @@ class FeatureNormalizer(object):
features.append( features.append(
featurize_func(AudioSegment.from_file(instance["feat"]))) featurize_func(AudioSegment.from_file(instance["feat"])))
features = np.hstack(features) #(D, T) features = np.hstack(features) #(D, T)
self._mean = np.mean(features, axis=1).reshape([-1, 1]) #(D, 1) self._mean = np.mean(features, axis=1).reshape([1, -1]) #(1, D)
self._std = np.std(features, axis=1).reshape([-1, 1]) #(D, 1) self._std = np.std(features, axis=1).reshape([1, -1]) #(1, D)

@ -235,8 +235,8 @@ def _load_kaldi_cmvn(kaldi_cmvn_file):
def _load_npz_cmvn(npz_cmvn_file, eps=1e-20): def _load_npz_cmvn(npz_cmvn_file, eps=1e-20):
npzfile = np.load(npz_cmvn_file) npzfile = np.load(npz_cmvn_file)
means = npzfile["mean"] #(D, 1) means = npzfile["mean"] #(1, D)
std = npzfile["std"] #(D, 1) std = npzfile["std"] #(1, D)
std = np.clip(std, eps, None) std = np.clip(std, eps, None)
variance = 1.0 / std variance = 1.0 / std
cmvn = np.array([means, variance]) cmvn = np.array([means, variance])

@ -16,8 +16,9 @@ import io
import random import random
import tarfile import tarfile
import logging import logging
from collections import namedtuple from typing import Optional
from yacs.config import CfgNode from yacs.config import CfgNode
from collections import namedtuple
from paddle.io import Dataset from paddle.io import Dataset
@ -42,6 +43,7 @@ class ManifestDataset(Dataset):
train_manifest="", train_manifest="",
dev_manifest="", dev_manifest="",
test_manifest="", test_manifest="",
manifest="",
unit_type="char", unit_type="char",
vocab_filepath="", vocab_filepath="",
spm_model_prefix="", spm_model_prefix="",
@ -60,7 +62,7 @@ class ManifestDataset(Dataset):
raw_wav=True, # use raw_wav or kaldi feature raw_wav=True, # use raw_wav or kaldi feature
specgram_type='linear', # 'linear', 'mfcc', 'fbank' specgram_type='linear', # 'linear', 'mfcc', 'fbank'
feat_dim=0, # 'mfcc', 'fbank' feat_dim=0, # 'mfcc', 'fbank'
delat_delta=False, # 'mfcc', 'fbank' delta_delta=False, # 'mfcc', 'fbank'
target_sample_rate=16000, # target sample rate target_sample_rate=16000, # target sample rate
use_dB_normalization=True, use_dB_normalization=True,
target_dB=-20, target_dB=-20,
@ -86,8 +88,9 @@ class ManifestDataset(Dataset):
Returns: Returns:
ManifestDataset: dataet object. ManifestDataset: dataet object.
""" """
assert manifest in config.data assert 'manifest' in config.data
assert keep_transcription_text in config.data assert config.data.manifest
assert 'keep_transcription_text' in config.data
if isinstance(config.data.augmentation_config, (str, bytes)): if isinstance(config.data.augmentation_config, (str, bytes)):
if config.data.augmentation_config: if config.data.augmentation_config:
@ -119,7 +122,7 @@ class ManifestDataset(Dataset):
target_sample_rate=config.data.target_sample_rate, target_sample_rate=config.data.target_sample_rate,
specgram_type=config.data.specgram_type, specgram_type=config.data.specgram_type,
feat_dim=config.data.feat_dim, feat_dim=config.data.feat_dim,
delta_delta=config.data.delat_delta, delta_delta=config.data.delta_delta,
use_dB_normalization=config.data.use_dB_normalization, use_dB_normalization=config.data.use_dB_normalization,
target_dB=config.data.target_dB, target_dB=config.data.target_dB,
random_seed=config.data.random_seed, random_seed=config.data.random_seed,

@ -75,8 +75,8 @@ class U2BaseModel(nn.Module):
dropout_rate=0.1, dropout_rate=0.1,
positional_dropout_rate=0.1, positional_dropout_rate=0.1,
attention_dropout_rate=0.0, attention_dropout_rate=0.0,
input_layer=conv2d, # encoder input type, you can chose conv2d, conv2d6 and conv2d8 input_layer='conv2d', # encoder input type, you can chose conv2d, conv2d6 and conv2d8
normalize_before=true, normalize_before=True,
cnn_module_kernel=15, cnn_module_kernel=15,
use_cnn_module=True, use_cnn_module=True,
activation_type='swish', activation_type='swish',
@ -98,7 +98,7 @@ class U2BaseModel(nn.Module):
dict( dict(
ctc_weight=0.3, ctc_weight=0.3,
lsm_weight=0.1, # label smoothing option lsm_weight=0.1, # label smoothing option
length_normalized_loss=false, )) length_normalized_loss=False, ))
if config is not None: if config is not None:
config.merge_from_other_cfg(default) config.merge_from_other_cfg(default)
@ -744,15 +744,9 @@ class U2Model(U2BaseModel):
ValueError: raise when using not support encoder type. ValueError: raise when using not support encoder type.
Returns: Returns:
int, nn.Layer, nn.Layer, nn.Layer: vocab size, encoder, decoder, ctc nn.Layer: U2Model
""" """
vocab_size, encoder, decoder, ctc = U2Model._init_from_config(configs) model = cls(configs)
model = cls(vocab_size=vocab_size,
encoder=encoder,
decoder=decoder,
ctc=ctc,
**configs['model_conf'])
return model return model
@classmethod @classmethod

@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
import logging import logging
from typing import Union
from typeguard import check_argument_types
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler

@ -0,0 +1,8 @@
[
{
"type": "shift",
"params": {"min_shift_ms": -5,
"max_shift_ms": 5},
"prob": 1.0
}
]

@ -5,7 +5,7 @@ data:
test_manifest: data/manifest.tiny test_manifest: data/manifest.tiny
vocab_filepath: data/vocab.txt vocab_filepath: data/vocab.txt
unit_type: 'spm' unit_type: 'spm'
spm_model_prefix: 'bpe_unigram_200' spm_model_prefix: 'data/bpe_unigram_200'
mean_std_filepath: "" mean_std_filepath: ""
augmentation_config: conf/augmentation.config augmentation_config: conf/augmentation.config
batch_size: 4 batch_size: 4
@ -119,11 +119,11 @@ training:
optim: adam optim: adam
optim_conf: optim_conf:
lr: 0.002 lr: 0.002
lr_decay: 1.0
weight_decay: 1e-06 weight_decay: 1e-06
scheduler: warmuplr # pytorch v1.1.0+ required scheduler: warmuplr # pytorch v1.1.0+ required
scheduler_conf: scheduler_conf:
warmup_steps: 25000 warmup_steps: 25000
lr_decay: 1.0
log_interval: 100 log_interval: 100
decoding: decoding:

Loading…
Cancel
Save