parent
91bc5959a9
commit
3843372958
@ -0,0 +1,219 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Contains U2 model."""
|
||||||
|
import paddle
|
||||||
|
from paddle import distributed as dist
|
||||||
|
from paddle.io import DataLoader
|
||||||
|
|
||||||
|
from deepspeech.io.collator import SpeechCollator
|
||||||
|
from deepspeech.io.dataset import ManifestDataset
|
||||||
|
from deepspeech.io.sampler import SortagradBatchSampler
|
||||||
|
from deepspeech.io.sampler import SortagradDistributedBatchSampler
|
||||||
|
from deepspeech.models.u2 import U2Evaluator
|
||||||
|
from deepspeech.models.u2 import U2Model
|
||||||
|
from deepspeech.models.u2 import U2Updater
|
||||||
|
from deepspeech.training.extensions.snapshot import Snapshot
|
||||||
|
from deepspeech.training.extensions.visualizer import VisualDL
|
||||||
|
from deepspeech.training.optimizer import OptimizerFactory
|
||||||
|
from deepspeech.training.scheduler import LRSchedulerFactory
|
||||||
|
from deepspeech.training.timer import Timer
|
||||||
|
from deepspeech.training.trainer import Trainer
|
||||||
|
from deepspeech.training.updaters.trainer import Trainer as NewTrainer
|
||||||
|
from deepspeech.utils import layer_tools
|
||||||
|
from deepspeech.utils.log import Log
|
||||||
|
|
||||||
|
logger = Log(__name__).getlog()
|
||||||
|
|
||||||
|
|
||||||
|
class U2Trainer(Trainer):
|
||||||
|
def __init__(self, config, args):
|
||||||
|
super().__init__(config, args)
|
||||||
|
|
||||||
|
def setup_dataloader(self):
|
||||||
|
config = self.config.clone()
|
||||||
|
config.defrost()
|
||||||
|
config.collator.keep_transcription_text = False
|
||||||
|
|
||||||
|
# train/valid dataset, return token ids
|
||||||
|
config.data.manifest = config.data.train_manifest
|
||||||
|
train_dataset = ManifestDataset.from_config(config)
|
||||||
|
|
||||||
|
config.data.manifest = config.data.dev_manifest
|
||||||
|
dev_dataset = ManifestDataset.from_config(config)
|
||||||
|
|
||||||
|
collate_fn_train = SpeechCollator.from_config(config)
|
||||||
|
|
||||||
|
config.collator.augmentation_config = ""
|
||||||
|
collate_fn_dev = SpeechCollator.from_config(config)
|
||||||
|
|
||||||
|
if self.parallel:
|
||||||
|
batch_sampler = SortagradDistributedBatchSampler(
|
||||||
|
train_dataset,
|
||||||
|
batch_size=config.collator.batch_size,
|
||||||
|
num_replicas=None,
|
||||||
|
rank=None,
|
||||||
|
shuffle=True,
|
||||||
|
drop_last=True,
|
||||||
|
sortagrad=config.collator.sortagrad,
|
||||||
|
shuffle_method=config.collator.shuffle_method)
|
||||||
|
else:
|
||||||
|
batch_sampler = SortagradBatchSampler(
|
||||||
|
train_dataset,
|
||||||
|
shuffle=True,
|
||||||
|
batch_size=config.collator.batch_size,
|
||||||
|
drop_last=True,
|
||||||
|
sortagrad=config.collator.sortagrad,
|
||||||
|
shuffle_method=config.collator.shuffle_method)
|
||||||
|
self.train_loader = DataLoader(
|
||||||
|
train_dataset,
|
||||||
|
batch_sampler=batch_sampler,
|
||||||
|
collate_fn=collate_fn_train,
|
||||||
|
num_workers=config.collator.num_workers, )
|
||||||
|
self.valid_loader = DataLoader(
|
||||||
|
dev_dataset,
|
||||||
|
batch_size=config.collator.batch_size,
|
||||||
|
shuffle=False,
|
||||||
|
drop_last=False,
|
||||||
|
collate_fn=collate_fn_dev)
|
||||||
|
|
||||||
|
# test dataset, return raw text
|
||||||
|
config.data.manifest = config.data.test_manifest
|
||||||
|
# filter test examples, will cause less examples, but no mismatch with training
|
||||||
|
# and can use large batch size , save training time, so filter test egs now.
|
||||||
|
config.data.min_input_len = 0.0 # second
|
||||||
|
config.data.max_input_len = float('inf') # second
|
||||||
|
config.data.min_output_len = 0.0 # tokens
|
||||||
|
config.data.max_output_len = float('inf') # tokens
|
||||||
|
config.data.min_output_input_ratio = 0.00
|
||||||
|
config.data.max_output_input_ratio = float('inf')
|
||||||
|
|
||||||
|
test_dataset = ManifestDataset.from_config(config)
|
||||||
|
# return text ord id
|
||||||
|
config.collator.keep_transcription_text = True
|
||||||
|
config.collator.augmentation_config = ""
|
||||||
|
self.test_loader = DataLoader(
|
||||||
|
test_dataset,
|
||||||
|
batch_size=config.decoding.batch_size,
|
||||||
|
shuffle=False,
|
||||||
|
drop_last=False,
|
||||||
|
collate_fn=SpeechCollator.from_config(config))
|
||||||
|
# return text token id
|
||||||
|
config.collator.keep_transcription_text = False
|
||||||
|
self.align_loader = DataLoader(
|
||||||
|
test_dataset,
|
||||||
|
batch_size=config.decoding.batch_size,
|
||||||
|
shuffle=False,
|
||||||
|
drop_last=False,
|
||||||
|
collate_fn=SpeechCollator.from_config(config))
|
||||||
|
logger.info("Setup train/valid/test/align Dataloader!")
|
||||||
|
|
||||||
|
def setup_model(self):
|
||||||
|
config = self.config
|
||||||
|
model_conf = config.model
|
||||||
|
model_conf.defrost()
|
||||||
|
model_conf.input_dim = self.train_loader.collate_fn.feature_size
|
||||||
|
model_conf.output_dim = self.train_loader.collate_fn.vocab_size
|
||||||
|
model_conf.freeze()
|
||||||
|
model = U2Model.from_config(model_conf)
|
||||||
|
|
||||||
|
if self.parallel:
|
||||||
|
model = paddle.DataParallel(model)
|
||||||
|
|
||||||
|
model.train()
|
||||||
|
logger.info(f"{model}")
|
||||||
|
layer_tools.print_params(model, logger.info)
|
||||||
|
|
||||||
|
train_config = config.training
|
||||||
|
optim_type = train_config.optim
|
||||||
|
optim_conf = train_config.optim_conf
|
||||||
|
scheduler_type = train_config.scheduler
|
||||||
|
scheduler_conf = train_config.scheduler_conf
|
||||||
|
|
||||||
|
scheduler_args = {
|
||||||
|
"learning_rate": optim_conf.lr,
|
||||||
|
"verbose": False,
|
||||||
|
"warmup_steps": scheduler_conf.warmup_steps,
|
||||||
|
"gamma": scheduler_conf.lr_decay,
|
||||||
|
"d_model": model_conf.encoder_conf.output_size,
|
||||||
|
}
|
||||||
|
lr_scheduler = LRSchedulerFactory.from_args(scheduler_type,
|
||||||
|
scheduler_args)
|
||||||
|
|
||||||
|
def optimizer_args(
|
||||||
|
config,
|
||||||
|
parameters,
|
||||||
|
lr_scheduler=None, ):
|
||||||
|
train_config = config.training
|
||||||
|
optim_type = train_config.optim
|
||||||
|
optim_conf = train_config.optim_conf
|
||||||
|
scheduler_type = train_config.scheduler
|
||||||
|
scheduler_conf = train_config.scheduler_conf
|
||||||
|
return {
|
||||||
|
"grad_clip": train_config.global_grad_clip,
|
||||||
|
"weight_decay": optim_conf.weight_decay,
|
||||||
|
"learning_rate": lr_scheduler
|
||||||
|
if lr_scheduler else optim_conf.lr,
|
||||||
|
"parameters": parameters,
|
||||||
|
"epsilon": 1e-9 if optim_type == 'noam' else None,
|
||||||
|
"beta1": 0.9 if optim_type == 'noam' else None,
|
||||||
|
"beat2": 0.98 if optim_type == 'noam' else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
|
||||||
|
optimizer = OptimizerFactory.from_args(optim_type, optimzer_args)
|
||||||
|
|
||||||
|
self.model = model
|
||||||
|
self.optimizer = optimizer
|
||||||
|
self.lr_scheduler = lr_scheduler
|
||||||
|
logger.info("Setup model/optimizer/lr_scheduler!")
|
||||||
|
|
||||||
|
def setup_updater(self):
|
||||||
|
output_dir = self.output_dir
|
||||||
|
config = self.config.training
|
||||||
|
|
||||||
|
updater = U2Updater(
|
||||||
|
model=self.model,
|
||||||
|
optimizer=self.optimizer,
|
||||||
|
scheduler=self.lr_scheduler,
|
||||||
|
dataloader=self.train_loader,
|
||||||
|
output_dir=output_dir,
|
||||||
|
accum_grad=config.accum_grad)
|
||||||
|
|
||||||
|
trainer = NewTrainer(updater, (config.n_epoch, 'epoch'), output_dir)
|
||||||
|
|
||||||
|
evaluator = U2Evaluator(self.model, self.valid_loader)
|
||||||
|
|
||||||
|
trainer.extend(evaluator, trigger=(1, "epoch"))
|
||||||
|
|
||||||
|
if dist.get_rank() == 0:
|
||||||
|
trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
|
||||||
|
num_snapshots = config.checkpoint.kbest_n
|
||||||
|
trainer.extend(
|
||||||
|
Snapshot(
|
||||||
|
mode='kbest',
|
||||||
|
max_size=num_snapshots,
|
||||||
|
indicator='VALID/LOSS',
|
||||||
|
less_better=True),
|
||||||
|
trigger=(1, 'epoch'))
|
||||||
|
# print(trainer.extensions)
|
||||||
|
# trainer.run()
|
||||||
|
self.trainer = trainer
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""The routine of the experiment after setup. This method is intended
|
||||||
|
to be used by the user.
|
||||||
|
"""
|
||||||
|
self.setup_updater()
|
||||||
|
with Timer("Training Done: {}"):
|
||||||
|
self.trainer.run()
|
@ -0,0 +1,19 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
from .u2 import U2InferModel
|
||||||
|
from .u2 import U2Model
|
||||||
|
from .updater import U2Evaluator
|
||||||
|
from .updater import U2Updater
|
||||||
|
|
||||||
|
__all__ = ["U2Model", "U2InferModel", "U2Evaluator", "U2Updater"]
|
@ -0,0 +1,149 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
from contextlib import nullcontext
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
from paddle import distributed as dist
|
||||||
|
|
||||||
|
from deepspeech.training.extensions.evaluator import StandardEvaluator
|
||||||
|
from deepspeech.training.reporter import report
|
||||||
|
from deepspeech.training.timer import Timer
|
||||||
|
from deepspeech.training.updaters.standard_updater import StandardUpdater
|
||||||
|
from deepspeech.utils import layer_tools
|
||||||
|
from deepspeech.utils.log import Log
|
||||||
|
|
||||||
|
logger = Log(__name__).getlog()
|
||||||
|
|
||||||
|
|
||||||
|
class U2Evaluator(StandardEvaluator):
|
||||||
|
def __init__(self, model, dataloader):
|
||||||
|
super().__init__(model, dataloader)
|
||||||
|
self.msg = ""
|
||||||
|
self.num_seen_utts = 0
|
||||||
|
self.total_loss = 0.0
|
||||||
|
|
||||||
|
def evaluate_core(self, batch):
|
||||||
|
self.msg = "Valid: Rank: {}, ".format(dist.get_rank())
|
||||||
|
losses_dict = {}
|
||||||
|
|
||||||
|
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||||
|
if paddle.isfinite(loss):
|
||||||
|
num_utts = batch[1].shape[0]
|
||||||
|
self.num_seen_utts += num_utts
|
||||||
|
self.total_loss += float(loss) * num_utts
|
||||||
|
|
||||||
|
losses_dict['loss'] = float(loss)
|
||||||
|
if attention_loss:
|
||||||
|
losses_dict['att_loss'] = float(attention_loss)
|
||||||
|
if ctc_loss:
|
||||||
|
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||||
|
|
||||||
|
for k, v in losses_dict.items():
|
||||||
|
report("eval/" + k, v)
|
||||||
|
|
||||||
|
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||||
|
for k, v in losses_dict.items())
|
||||||
|
logger.info(self.msg)
|
||||||
|
return self.total_loss, self.num_seen_utts
|
||||||
|
|
||||||
|
|
||||||
|
class U2Updater(StandardUpdater):
|
||||||
|
def __init__(self,
|
||||||
|
model,
|
||||||
|
optimizer,
|
||||||
|
scheduler,
|
||||||
|
dataloader,
|
||||||
|
init_state=None,
|
||||||
|
accum_grad=1,
|
||||||
|
**kwargs):
|
||||||
|
super().__init__(
|
||||||
|
model, optimizer, scheduler, dataloader, init_state=init_state)
|
||||||
|
self.accum_grad = accum_grad
|
||||||
|
self.forward_count = 0
|
||||||
|
self.msg = ""
|
||||||
|
|
||||||
|
def update_core(self, batch):
|
||||||
|
"""One Step
|
||||||
|
|
||||||
|
Args:
|
||||||
|
batch (List[Object]): utts, xs, xlens, ys, ylens
|
||||||
|
"""
|
||||||
|
losses_dict = {}
|
||||||
|
self.msg = "Rank: {}, ".format(dist.get_rank())
|
||||||
|
|
||||||
|
# forward
|
||||||
|
batch_size = batch[1].shape[0]
|
||||||
|
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||||
|
# loss div by `batch_size * accum_grad`
|
||||||
|
loss /= self.accum_grad
|
||||||
|
|
||||||
|
# loss backward
|
||||||
|
if (self.forward_count + 1) != self.accum_grad:
|
||||||
|
# Disable gradient synchronizations across DDP processes.
|
||||||
|
# Within this context, gradients will be accumulated on module
|
||||||
|
# variables, which will later be synchronized.
|
||||||
|
context = self.model.no_sync
|
||||||
|
else:
|
||||||
|
# Used for single gpu training and DDP gradient synchronization
|
||||||
|
# processes.
|
||||||
|
context = nullcontext
|
||||||
|
|
||||||
|
with context():
|
||||||
|
loss.backward()
|
||||||
|
layer_tools.print_grads(self.model, print_func=None)
|
||||||
|
|
||||||
|
# loss info
|
||||||
|
losses_dict['loss'] = float(loss) * self.accum_grad
|
||||||
|
if attention_loss:
|
||||||
|
losses_dict['att_loss'] = float(attention_loss)
|
||||||
|
if ctc_loss:
|
||||||
|
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||||
|
# report loss
|
||||||
|
for k, v in losses_dict.items():
|
||||||
|
report("train/" + k, v)
|
||||||
|
# loss msg
|
||||||
|
self.msg += "batch size: {}, ".format(batch_size)
|
||||||
|
self.msg += "accum: {}, ".format(self.accum_grad)
|
||||||
|
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||||
|
for k, v in losses_dict.items())
|
||||||
|
|
||||||
|
# Truncate the graph
|
||||||
|
loss.detach()
|
||||||
|
|
||||||
|
# update parameters
|
||||||
|
self.forward_count += 1
|
||||||
|
if self.forward_count != self.accum_grad:
|
||||||
|
return
|
||||||
|
self.forward_count = 0
|
||||||
|
|
||||||
|
self.optimizer.step()
|
||||||
|
self.optimizer.clear_grad()
|
||||||
|
self.scheduler.step()
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
# model is default in train mode
|
||||||
|
|
||||||
|
# training for a step is implemented here
|
||||||
|
with Timer("data time cost:{}"):
|
||||||
|
batch = self.read_batch()
|
||||||
|
with Timer("step time cost:{}"):
|
||||||
|
self.update_core(batch)
|
||||||
|
|
||||||
|
# #iterations with accum_grad > 1
|
||||||
|
# Ref.: https://github.com/espnet/espnet/issues/777
|
||||||
|
if self.forward_count == 0:
|
||||||
|
self.state.iteration += 1
|
||||||
|
if self.updates_per_epoch is not None:
|
||||||
|
if self.state.iteration % self.updates_per_epoch == 0:
|
||||||
|
self.state.epoch += 1
|
Loading…
Reference in new issue