parent
91bc5959a9
commit
3843372958
@ -0,0 +1,219 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Contains U2 model."""
|
||||
import paddle
|
||||
from paddle import distributed as dist
|
||||
from paddle.io import DataLoader
|
||||
|
||||
from deepspeech.io.collator import SpeechCollator
|
||||
from deepspeech.io.dataset import ManifestDataset
|
||||
from deepspeech.io.sampler import SortagradBatchSampler
|
||||
from deepspeech.io.sampler import SortagradDistributedBatchSampler
|
||||
from deepspeech.models.u2 import U2Evaluator
|
||||
from deepspeech.models.u2 import U2Model
|
||||
from deepspeech.models.u2 import U2Updater
|
||||
from deepspeech.training.extensions.snapshot import Snapshot
|
||||
from deepspeech.training.extensions.visualizer import VisualDL
|
||||
from deepspeech.training.optimizer import OptimizerFactory
|
||||
from deepspeech.training.scheduler import LRSchedulerFactory
|
||||
from deepspeech.training.timer import Timer
|
||||
from deepspeech.training.trainer import Trainer
|
||||
from deepspeech.training.updaters.trainer import Trainer as NewTrainer
|
||||
from deepspeech.utils import layer_tools
|
||||
from deepspeech.utils.log import Log
|
||||
|
||||
logger = Log(__name__).getlog()
|
||||
|
||||
|
||||
class U2Trainer(Trainer):
|
||||
def __init__(self, config, args):
|
||||
super().__init__(config, args)
|
||||
|
||||
def setup_dataloader(self):
|
||||
config = self.config.clone()
|
||||
config.defrost()
|
||||
config.collator.keep_transcription_text = False
|
||||
|
||||
# train/valid dataset, return token ids
|
||||
config.data.manifest = config.data.train_manifest
|
||||
train_dataset = ManifestDataset.from_config(config)
|
||||
|
||||
config.data.manifest = config.data.dev_manifest
|
||||
dev_dataset = ManifestDataset.from_config(config)
|
||||
|
||||
collate_fn_train = SpeechCollator.from_config(config)
|
||||
|
||||
config.collator.augmentation_config = ""
|
||||
collate_fn_dev = SpeechCollator.from_config(config)
|
||||
|
||||
if self.parallel:
|
||||
batch_sampler = SortagradDistributedBatchSampler(
|
||||
train_dataset,
|
||||
batch_size=config.collator.batch_size,
|
||||
num_replicas=None,
|
||||
rank=None,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
sortagrad=config.collator.sortagrad,
|
||||
shuffle_method=config.collator.shuffle_method)
|
||||
else:
|
||||
batch_sampler = SortagradBatchSampler(
|
||||
train_dataset,
|
||||
shuffle=True,
|
||||
batch_size=config.collator.batch_size,
|
||||
drop_last=True,
|
||||
sortagrad=config.collator.sortagrad,
|
||||
shuffle_method=config.collator.shuffle_method)
|
||||
self.train_loader = DataLoader(
|
||||
train_dataset,
|
||||
batch_sampler=batch_sampler,
|
||||
collate_fn=collate_fn_train,
|
||||
num_workers=config.collator.num_workers, )
|
||||
self.valid_loader = DataLoader(
|
||||
dev_dataset,
|
||||
batch_size=config.collator.batch_size,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
collate_fn=collate_fn_dev)
|
||||
|
||||
# test dataset, return raw text
|
||||
config.data.manifest = config.data.test_manifest
|
||||
# filter test examples, will cause less examples, but no mismatch with training
|
||||
# and can use large batch size , save training time, so filter test egs now.
|
||||
config.data.min_input_len = 0.0 # second
|
||||
config.data.max_input_len = float('inf') # second
|
||||
config.data.min_output_len = 0.0 # tokens
|
||||
config.data.max_output_len = float('inf') # tokens
|
||||
config.data.min_output_input_ratio = 0.00
|
||||
config.data.max_output_input_ratio = float('inf')
|
||||
|
||||
test_dataset = ManifestDataset.from_config(config)
|
||||
# return text ord id
|
||||
config.collator.keep_transcription_text = True
|
||||
config.collator.augmentation_config = ""
|
||||
self.test_loader = DataLoader(
|
||||
test_dataset,
|
||||
batch_size=config.decoding.batch_size,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
collate_fn=SpeechCollator.from_config(config))
|
||||
# return text token id
|
||||
config.collator.keep_transcription_text = False
|
||||
self.align_loader = DataLoader(
|
||||
test_dataset,
|
||||
batch_size=config.decoding.batch_size,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
collate_fn=SpeechCollator.from_config(config))
|
||||
logger.info("Setup train/valid/test/align Dataloader!")
|
||||
|
||||
def setup_model(self):
|
||||
config = self.config
|
||||
model_conf = config.model
|
||||
model_conf.defrost()
|
||||
model_conf.input_dim = self.train_loader.collate_fn.feature_size
|
||||
model_conf.output_dim = self.train_loader.collate_fn.vocab_size
|
||||
model_conf.freeze()
|
||||
model = U2Model.from_config(model_conf)
|
||||
|
||||
if self.parallel:
|
||||
model = paddle.DataParallel(model)
|
||||
|
||||
model.train()
|
||||
logger.info(f"{model}")
|
||||
layer_tools.print_params(model, logger.info)
|
||||
|
||||
train_config = config.training
|
||||
optim_type = train_config.optim
|
||||
optim_conf = train_config.optim_conf
|
||||
scheduler_type = train_config.scheduler
|
||||
scheduler_conf = train_config.scheduler_conf
|
||||
|
||||
scheduler_args = {
|
||||
"learning_rate": optim_conf.lr,
|
||||
"verbose": False,
|
||||
"warmup_steps": scheduler_conf.warmup_steps,
|
||||
"gamma": scheduler_conf.lr_decay,
|
||||
"d_model": model_conf.encoder_conf.output_size,
|
||||
}
|
||||
lr_scheduler = LRSchedulerFactory.from_args(scheduler_type,
|
||||
scheduler_args)
|
||||
|
||||
def optimizer_args(
|
||||
config,
|
||||
parameters,
|
||||
lr_scheduler=None, ):
|
||||
train_config = config.training
|
||||
optim_type = train_config.optim
|
||||
optim_conf = train_config.optim_conf
|
||||
scheduler_type = train_config.scheduler
|
||||
scheduler_conf = train_config.scheduler_conf
|
||||
return {
|
||||
"grad_clip": train_config.global_grad_clip,
|
||||
"weight_decay": optim_conf.weight_decay,
|
||||
"learning_rate": lr_scheduler
|
||||
if lr_scheduler else optim_conf.lr,
|
||||
"parameters": parameters,
|
||||
"epsilon": 1e-9 if optim_type == 'noam' else None,
|
||||
"beta1": 0.9 if optim_type == 'noam' else None,
|
||||
"beat2": 0.98 if optim_type == 'noam' else None,
|
||||
}
|
||||
|
||||
optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
|
||||
optimizer = OptimizerFactory.from_args(optim_type, optimzer_args)
|
||||
|
||||
self.model = model
|
||||
self.optimizer = optimizer
|
||||
self.lr_scheduler = lr_scheduler
|
||||
logger.info("Setup model/optimizer/lr_scheduler!")
|
||||
|
||||
def setup_updater(self):
|
||||
output_dir = self.output_dir
|
||||
config = self.config.training
|
||||
|
||||
updater = U2Updater(
|
||||
model=self.model,
|
||||
optimizer=self.optimizer,
|
||||
scheduler=self.lr_scheduler,
|
||||
dataloader=self.train_loader,
|
||||
output_dir=output_dir,
|
||||
accum_grad=config.accum_grad)
|
||||
|
||||
trainer = NewTrainer(updater, (config.n_epoch, 'epoch'), output_dir)
|
||||
|
||||
evaluator = U2Evaluator(self.model, self.valid_loader)
|
||||
|
||||
trainer.extend(evaluator, trigger=(1, "epoch"))
|
||||
|
||||
if dist.get_rank() == 0:
|
||||
trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
|
||||
num_snapshots = config.checkpoint.kbest_n
|
||||
trainer.extend(
|
||||
Snapshot(
|
||||
mode='kbest',
|
||||
max_size=num_snapshots,
|
||||
indicator='VALID/LOSS',
|
||||
less_better=True),
|
||||
trigger=(1, 'epoch'))
|
||||
# print(trainer.extensions)
|
||||
# trainer.run()
|
||||
self.trainer = trainer
|
||||
|
||||
def run(self):
|
||||
"""The routine of the experiment after setup. This method is intended
|
||||
to be used by the user.
|
||||
"""
|
||||
self.setup_updater()
|
||||
with Timer("Training Done: {}"):
|
||||
self.trainer.run()
|
@ -0,0 +1,19 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from .u2 import U2InferModel
|
||||
from .u2 import U2Model
|
||||
from .updater import U2Evaluator
|
||||
from .updater import U2Updater
|
||||
|
||||
__all__ = ["U2Model", "U2InferModel", "U2Evaluator", "U2Updater"]
|
@ -0,0 +1,149 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from contextlib import nullcontext
|
||||
|
||||
import paddle
|
||||
from paddle import distributed as dist
|
||||
|
||||
from deepspeech.training.extensions.evaluator import StandardEvaluator
|
||||
from deepspeech.training.reporter import report
|
||||
from deepspeech.training.timer import Timer
|
||||
from deepspeech.training.updaters.standard_updater import StandardUpdater
|
||||
from deepspeech.utils import layer_tools
|
||||
from deepspeech.utils.log import Log
|
||||
|
||||
logger = Log(__name__).getlog()
|
||||
|
||||
|
||||
class U2Evaluator(StandardEvaluator):
|
||||
def __init__(self, model, dataloader):
|
||||
super().__init__(model, dataloader)
|
||||
self.msg = ""
|
||||
self.num_seen_utts = 0
|
||||
self.total_loss = 0.0
|
||||
|
||||
def evaluate_core(self, batch):
|
||||
self.msg = "Valid: Rank: {}, ".format(dist.get_rank())
|
||||
losses_dict = {}
|
||||
|
||||
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||
if paddle.isfinite(loss):
|
||||
num_utts = batch[1].shape[0]
|
||||
self.num_seen_utts += num_utts
|
||||
self.total_loss += float(loss) * num_utts
|
||||
|
||||
losses_dict['loss'] = float(loss)
|
||||
if attention_loss:
|
||||
losses_dict['att_loss'] = float(attention_loss)
|
||||
if ctc_loss:
|
||||
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||
|
||||
for k, v in losses_dict.items():
|
||||
report("eval/" + k, v)
|
||||
|
||||
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||
for k, v in losses_dict.items())
|
||||
logger.info(self.msg)
|
||||
return self.total_loss, self.num_seen_utts
|
||||
|
||||
|
||||
class U2Updater(StandardUpdater):
|
||||
def __init__(self,
|
||||
model,
|
||||
optimizer,
|
||||
scheduler,
|
||||
dataloader,
|
||||
init_state=None,
|
||||
accum_grad=1,
|
||||
**kwargs):
|
||||
super().__init__(
|
||||
model, optimizer, scheduler, dataloader, init_state=init_state)
|
||||
self.accum_grad = accum_grad
|
||||
self.forward_count = 0
|
||||
self.msg = ""
|
||||
|
||||
def update_core(self, batch):
|
||||
"""One Step
|
||||
|
||||
Args:
|
||||
batch (List[Object]): utts, xs, xlens, ys, ylens
|
||||
"""
|
||||
losses_dict = {}
|
||||
self.msg = "Rank: {}, ".format(dist.get_rank())
|
||||
|
||||
# forward
|
||||
batch_size = batch[1].shape[0]
|
||||
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||
# loss div by `batch_size * accum_grad`
|
||||
loss /= self.accum_grad
|
||||
|
||||
# loss backward
|
||||
if (self.forward_count + 1) != self.accum_grad:
|
||||
# Disable gradient synchronizations across DDP processes.
|
||||
# Within this context, gradients will be accumulated on module
|
||||
# variables, which will later be synchronized.
|
||||
context = self.model.no_sync
|
||||
else:
|
||||
# Used for single gpu training and DDP gradient synchronization
|
||||
# processes.
|
||||
context = nullcontext
|
||||
|
||||
with context():
|
||||
loss.backward()
|
||||
layer_tools.print_grads(self.model, print_func=None)
|
||||
|
||||
# loss info
|
||||
losses_dict['loss'] = float(loss) * self.accum_grad
|
||||
if attention_loss:
|
||||
losses_dict['att_loss'] = float(attention_loss)
|
||||
if ctc_loss:
|
||||
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||
# report loss
|
||||
for k, v in losses_dict.items():
|
||||
report("train/" + k, v)
|
||||
# loss msg
|
||||
self.msg += "batch size: {}, ".format(batch_size)
|
||||
self.msg += "accum: {}, ".format(self.accum_grad)
|
||||
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||
for k, v in losses_dict.items())
|
||||
|
||||
# Truncate the graph
|
||||
loss.detach()
|
||||
|
||||
# update parameters
|
||||
self.forward_count += 1
|
||||
if self.forward_count != self.accum_grad:
|
||||
return
|
||||
self.forward_count = 0
|
||||
|
||||
self.optimizer.step()
|
||||
self.optimizer.clear_grad()
|
||||
self.scheduler.step()
|
||||
|
||||
def update(self):
|
||||
# model is default in train mode
|
||||
|
||||
# training for a step is implemented here
|
||||
with Timer("data time cost:{}"):
|
||||
batch = self.read_batch()
|
||||
with Timer("step time cost:{}"):
|
||||
self.update_core(batch)
|
||||
|
||||
# #iterations with accum_grad > 1
|
||||
# Ref.: https://github.com/espnet/espnet/issues/777
|
||||
if self.forward_count == 0:
|
||||
self.state.iteration += 1
|
||||
if self.updates_per_epoch is not None:
|
||||
if self.state.iteration % self.updates_per_epoch == 0:
|
||||
self.state.epoch += 1
|
Loading…
Reference in new issue