commit
84020a0471
@ -0,0 +1,219 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Contains U2 model."""
|
||||||
|
import paddle
|
||||||
|
from paddle import distributed as dist
|
||||||
|
from paddle.io import DataLoader
|
||||||
|
|
||||||
|
from deepspeech.io.collator import SpeechCollator
|
||||||
|
from deepspeech.io.dataset import ManifestDataset
|
||||||
|
from deepspeech.io.sampler import SortagradBatchSampler
|
||||||
|
from deepspeech.io.sampler import SortagradDistributedBatchSampler
|
||||||
|
from deepspeech.models.u2 import U2Evaluator
|
||||||
|
from deepspeech.models.u2 import U2Model
|
||||||
|
from deepspeech.models.u2 import U2Updater
|
||||||
|
from deepspeech.training.extensions.snapshot import Snapshot
|
||||||
|
from deepspeech.training.extensions.visualizer import VisualDL
|
||||||
|
from deepspeech.training.optimizer import OptimizerFactory
|
||||||
|
from deepspeech.training.scheduler import LRSchedulerFactory
|
||||||
|
from deepspeech.training.timer import Timer
|
||||||
|
from deepspeech.training.trainer import Trainer
|
||||||
|
from deepspeech.training.updaters.trainer import Trainer as NewTrainer
|
||||||
|
from deepspeech.utils import layer_tools
|
||||||
|
from deepspeech.utils.log import Log
|
||||||
|
|
||||||
|
logger = Log(__name__).getlog()
|
||||||
|
|
||||||
|
|
||||||
|
class U2Trainer(Trainer):
|
||||||
|
def __init__(self, config, args):
|
||||||
|
super().__init__(config, args)
|
||||||
|
|
||||||
|
def setup_dataloader(self):
|
||||||
|
config = self.config.clone()
|
||||||
|
config.defrost()
|
||||||
|
config.collator.keep_transcription_text = False
|
||||||
|
|
||||||
|
# train/valid dataset, return token ids
|
||||||
|
config.data.manifest = config.data.train_manifest
|
||||||
|
train_dataset = ManifestDataset.from_config(config)
|
||||||
|
|
||||||
|
config.data.manifest = config.data.dev_manifest
|
||||||
|
dev_dataset = ManifestDataset.from_config(config)
|
||||||
|
|
||||||
|
collate_fn_train = SpeechCollator.from_config(config)
|
||||||
|
|
||||||
|
config.collator.augmentation_config = ""
|
||||||
|
collate_fn_dev = SpeechCollator.from_config(config)
|
||||||
|
|
||||||
|
if self.parallel:
|
||||||
|
batch_sampler = SortagradDistributedBatchSampler(
|
||||||
|
train_dataset,
|
||||||
|
batch_size=config.collator.batch_size,
|
||||||
|
num_replicas=None,
|
||||||
|
rank=None,
|
||||||
|
shuffle=True,
|
||||||
|
drop_last=True,
|
||||||
|
sortagrad=config.collator.sortagrad,
|
||||||
|
shuffle_method=config.collator.shuffle_method)
|
||||||
|
else:
|
||||||
|
batch_sampler = SortagradBatchSampler(
|
||||||
|
train_dataset,
|
||||||
|
shuffle=True,
|
||||||
|
batch_size=config.collator.batch_size,
|
||||||
|
drop_last=True,
|
||||||
|
sortagrad=config.collator.sortagrad,
|
||||||
|
shuffle_method=config.collator.shuffle_method)
|
||||||
|
self.train_loader = DataLoader(
|
||||||
|
train_dataset,
|
||||||
|
batch_sampler=batch_sampler,
|
||||||
|
collate_fn=collate_fn_train,
|
||||||
|
num_workers=config.collator.num_workers, )
|
||||||
|
self.valid_loader = DataLoader(
|
||||||
|
dev_dataset,
|
||||||
|
batch_size=config.collator.batch_size,
|
||||||
|
shuffle=False,
|
||||||
|
drop_last=False,
|
||||||
|
collate_fn=collate_fn_dev)
|
||||||
|
|
||||||
|
# test dataset, return raw text
|
||||||
|
config.data.manifest = config.data.test_manifest
|
||||||
|
# filter test examples, will cause less examples, but no mismatch with training
|
||||||
|
# and can use large batch size , save training time, so filter test egs now.
|
||||||
|
config.data.min_input_len = 0.0 # second
|
||||||
|
config.data.max_input_len = float('inf') # second
|
||||||
|
config.data.min_output_len = 0.0 # tokens
|
||||||
|
config.data.max_output_len = float('inf') # tokens
|
||||||
|
config.data.min_output_input_ratio = 0.00
|
||||||
|
config.data.max_output_input_ratio = float('inf')
|
||||||
|
|
||||||
|
test_dataset = ManifestDataset.from_config(config)
|
||||||
|
# return text ord id
|
||||||
|
config.collator.keep_transcription_text = True
|
||||||
|
config.collator.augmentation_config = ""
|
||||||
|
self.test_loader = DataLoader(
|
||||||
|
test_dataset,
|
||||||
|
batch_size=config.decoding.batch_size,
|
||||||
|
shuffle=False,
|
||||||
|
drop_last=False,
|
||||||
|
collate_fn=SpeechCollator.from_config(config))
|
||||||
|
# return text token id
|
||||||
|
config.collator.keep_transcription_text = False
|
||||||
|
self.align_loader = DataLoader(
|
||||||
|
test_dataset,
|
||||||
|
batch_size=config.decoding.batch_size,
|
||||||
|
shuffle=False,
|
||||||
|
drop_last=False,
|
||||||
|
collate_fn=SpeechCollator.from_config(config))
|
||||||
|
logger.info("Setup train/valid/test/align Dataloader!")
|
||||||
|
|
||||||
|
def setup_model(self):
|
||||||
|
config = self.config
|
||||||
|
model_conf = config.model
|
||||||
|
model_conf.defrost()
|
||||||
|
model_conf.input_dim = self.train_loader.collate_fn.feature_size
|
||||||
|
model_conf.output_dim = self.train_loader.collate_fn.vocab_size
|
||||||
|
model_conf.freeze()
|
||||||
|
model = U2Model.from_config(model_conf)
|
||||||
|
|
||||||
|
if self.parallel:
|
||||||
|
model = paddle.DataParallel(model)
|
||||||
|
|
||||||
|
model.train()
|
||||||
|
logger.info(f"{model}")
|
||||||
|
layer_tools.print_params(model, logger.info)
|
||||||
|
|
||||||
|
train_config = config.training
|
||||||
|
optim_type = train_config.optim
|
||||||
|
optim_conf = train_config.optim_conf
|
||||||
|
scheduler_type = train_config.scheduler
|
||||||
|
scheduler_conf = train_config.scheduler_conf
|
||||||
|
|
||||||
|
scheduler_args = {
|
||||||
|
"learning_rate": optim_conf.lr,
|
||||||
|
"verbose": False,
|
||||||
|
"warmup_steps": scheduler_conf.warmup_steps,
|
||||||
|
"gamma": scheduler_conf.lr_decay,
|
||||||
|
"d_model": model_conf.encoder_conf.output_size,
|
||||||
|
}
|
||||||
|
lr_scheduler = LRSchedulerFactory.from_args(scheduler_type,
|
||||||
|
scheduler_args)
|
||||||
|
|
||||||
|
def optimizer_args(
|
||||||
|
config,
|
||||||
|
parameters,
|
||||||
|
lr_scheduler=None, ):
|
||||||
|
train_config = config.training
|
||||||
|
optim_type = train_config.optim
|
||||||
|
optim_conf = train_config.optim_conf
|
||||||
|
scheduler_type = train_config.scheduler
|
||||||
|
scheduler_conf = train_config.scheduler_conf
|
||||||
|
return {
|
||||||
|
"grad_clip": train_config.global_grad_clip,
|
||||||
|
"weight_decay": optim_conf.weight_decay,
|
||||||
|
"learning_rate": lr_scheduler
|
||||||
|
if lr_scheduler else optim_conf.lr,
|
||||||
|
"parameters": parameters,
|
||||||
|
"epsilon": 1e-9 if optim_type == 'noam' else None,
|
||||||
|
"beta1": 0.9 if optim_type == 'noam' else None,
|
||||||
|
"beat2": 0.98 if optim_type == 'noam' else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
|
||||||
|
optimizer = OptimizerFactory.from_args(optim_type, optimzer_args)
|
||||||
|
|
||||||
|
self.model = model
|
||||||
|
self.optimizer = optimizer
|
||||||
|
self.lr_scheduler = lr_scheduler
|
||||||
|
logger.info("Setup model/optimizer/lr_scheduler!")
|
||||||
|
|
||||||
|
def setup_updater(self):
|
||||||
|
output_dir = self.output_dir
|
||||||
|
config = self.config.training
|
||||||
|
|
||||||
|
updater = U2Updater(
|
||||||
|
model=self.model,
|
||||||
|
optimizer=self.optimizer,
|
||||||
|
scheduler=self.lr_scheduler,
|
||||||
|
dataloader=self.train_loader,
|
||||||
|
output_dir=output_dir,
|
||||||
|
accum_grad=config.accum_grad)
|
||||||
|
|
||||||
|
trainer = NewTrainer(updater, (config.n_epoch, 'epoch'), output_dir)
|
||||||
|
|
||||||
|
evaluator = U2Evaluator(self.model, self.valid_loader)
|
||||||
|
|
||||||
|
trainer.extend(evaluator, trigger=(1, "epoch"))
|
||||||
|
|
||||||
|
if dist.get_rank() == 0:
|
||||||
|
trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
|
||||||
|
num_snapshots = config.checkpoint.kbest_n
|
||||||
|
trainer.extend(
|
||||||
|
Snapshot(
|
||||||
|
mode='kbest',
|
||||||
|
max_size=num_snapshots,
|
||||||
|
indicator='VALID/LOSS',
|
||||||
|
less_better=True),
|
||||||
|
trigger=(1, 'epoch'))
|
||||||
|
# print(trainer.extensions)
|
||||||
|
# trainer.run()
|
||||||
|
self.trainer = trainer
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""The routine of the experiment after setup. This method is intended
|
||||||
|
to be used by the user.
|
||||||
|
"""
|
||||||
|
self.setup_updater()
|
||||||
|
with Timer("Training Done: {}"):
|
||||||
|
self.trainer.run()
|
@ -0,0 +1,19 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
from .u2 import U2InferModel
|
||||||
|
from .u2 import U2Model
|
||||||
|
from .updater import U2Evaluator
|
||||||
|
from .updater import U2Updater
|
||||||
|
|
||||||
|
__all__ = ["U2Model", "U2InferModel", "U2Evaluator", "U2Updater"]
|
@ -0,0 +1,149 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
from contextlib import nullcontext
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
from paddle import distributed as dist
|
||||||
|
|
||||||
|
from deepspeech.training.extensions.evaluator import StandardEvaluator
|
||||||
|
from deepspeech.training.reporter import report
|
||||||
|
from deepspeech.training.timer import Timer
|
||||||
|
from deepspeech.training.updaters.standard_updater import StandardUpdater
|
||||||
|
from deepspeech.utils import layer_tools
|
||||||
|
from deepspeech.utils.log import Log
|
||||||
|
|
||||||
|
logger = Log(__name__).getlog()
|
||||||
|
|
||||||
|
|
||||||
|
class U2Evaluator(StandardEvaluator):
|
||||||
|
def __init__(self, model, dataloader):
|
||||||
|
super().__init__(model, dataloader)
|
||||||
|
self.msg = ""
|
||||||
|
self.num_seen_utts = 0
|
||||||
|
self.total_loss = 0.0
|
||||||
|
|
||||||
|
def evaluate_core(self, batch):
|
||||||
|
self.msg = "Valid: Rank: {}, ".format(dist.get_rank())
|
||||||
|
losses_dict = {}
|
||||||
|
|
||||||
|
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||||
|
if paddle.isfinite(loss):
|
||||||
|
num_utts = batch[1].shape[0]
|
||||||
|
self.num_seen_utts += num_utts
|
||||||
|
self.total_loss += float(loss) * num_utts
|
||||||
|
|
||||||
|
losses_dict['loss'] = float(loss)
|
||||||
|
if attention_loss:
|
||||||
|
losses_dict['att_loss'] = float(attention_loss)
|
||||||
|
if ctc_loss:
|
||||||
|
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||||
|
|
||||||
|
for k, v in losses_dict.items():
|
||||||
|
report("eval/" + k, v)
|
||||||
|
|
||||||
|
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||||
|
for k, v in losses_dict.items())
|
||||||
|
logger.info(self.msg)
|
||||||
|
return self.total_loss, self.num_seen_utts
|
||||||
|
|
||||||
|
|
||||||
|
class U2Updater(StandardUpdater):
|
||||||
|
def __init__(self,
|
||||||
|
model,
|
||||||
|
optimizer,
|
||||||
|
scheduler,
|
||||||
|
dataloader,
|
||||||
|
init_state=None,
|
||||||
|
accum_grad=1,
|
||||||
|
**kwargs):
|
||||||
|
super().__init__(
|
||||||
|
model, optimizer, scheduler, dataloader, init_state=init_state)
|
||||||
|
self.accum_grad = accum_grad
|
||||||
|
self.forward_count = 0
|
||||||
|
self.msg = ""
|
||||||
|
|
||||||
|
def update_core(self, batch):
|
||||||
|
"""One Step
|
||||||
|
|
||||||
|
Args:
|
||||||
|
batch (List[Object]): utts, xs, xlens, ys, ylens
|
||||||
|
"""
|
||||||
|
losses_dict = {}
|
||||||
|
self.msg = "Rank: {}, ".format(dist.get_rank())
|
||||||
|
|
||||||
|
# forward
|
||||||
|
batch_size = batch[1].shape[0]
|
||||||
|
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||||
|
# loss div by `batch_size * accum_grad`
|
||||||
|
loss /= self.accum_grad
|
||||||
|
|
||||||
|
# loss backward
|
||||||
|
if (self.forward_count + 1) != self.accum_grad:
|
||||||
|
# Disable gradient synchronizations across DDP processes.
|
||||||
|
# Within this context, gradients will be accumulated on module
|
||||||
|
# variables, which will later be synchronized.
|
||||||
|
context = self.model.no_sync
|
||||||
|
else:
|
||||||
|
# Used for single gpu training and DDP gradient synchronization
|
||||||
|
# processes.
|
||||||
|
context = nullcontext
|
||||||
|
|
||||||
|
with context():
|
||||||
|
loss.backward()
|
||||||
|
layer_tools.print_grads(self.model, print_func=None)
|
||||||
|
|
||||||
|
# loss info
|
||||||
|
losses_dict['loss'] = float(loss) * self.accum_grad
|
||||||
|
if attention_loss:
|
||||||
|
losses_dict['att_loss'] = float(attention_loss)
|
||||||
|
if ctc_loss:
|
||||||
|
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||||
|
# report loss
|
||||||
|
for k, v in losses_dict.items():
|
||||||
|
report("train/" + k, v)
|
||||||
|
# loss msg
|
||||||
|
self.msg += "batch size: {}, ".format(batch_size)
|
||||||
|
self.msg += "accum: {}, ".format(self.accum_grad)
|
||||||
|
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||||
|
for k, v in losses_dict.items())
|
||||||
|
|
||||||
|
# Truncate the graph
|
||||||
|
loss.detach()
|
||||||
|
|
||||||
|
# update parameters
|
||||||
|
self.forward_count += 1
|
||||||
|
if self.forward_count != self.accum_grad:
|
||||||
|
return
|
||||||
|
self.forward_count = 0
|
||||||
|
|
||||||
|
self.optimizer.step()
|
||||||
|
self.optimizer.clear_grad()
|
||||||
|
self.scheduler.step()
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
# model is default in train mode
|
||||||
|
|
||||||
|
# training for a step is implemented here
|
||||||
|
with Timer("data time cost:{}"):
|
||||||
|
batch = self.read_batch()
|
||||||
|
with Timer("step time cost:{}"):
|
||||||
|
self.update_core(batch)
|
||||||
|
|
||||||
|
# #iterations with accum_grad > 1
|
||||||
|
# Ref.: https://github.com/espnet/espnet/issues/777
|
||||||
|
if self.forward_count == 0:
|
||||||
|
self.state.iteration += 1
|
||||||
|
if self.updates_per_epoch is not None:
|
||||||
|
if self.state.iteration % self.updates_per_epoch == 0:
|
||||||
|
self.state.epoch += 1
|
@ -0,0 +1,50 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
import datetime
|
||||||
|
import time
|
||||||
|
|
||||||
|
from deepspeech.utils.log import Log
|
||||||
|
|
||||||
|
__all__ = ["Timer"]
|
||||||
|
|
||||||
|
logger = Log(__name__).getlog()
|
||||||
|
|
||||||
|
|
||||||
|
class Timer():
|
||||||
|
"""To be used like this:
|
||||||
|
with Timer("Message") as value:
|
||||||
|
do some thing
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, message=None):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
def duration(self) -> str:
|
||||||
|
elapsed_time = time.time() - self.start
|
||||||
|
time_str = str(datetime.timedelta(seconds=elapsed_time))
|
||||||
|
return time_str
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.start = time.time()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, type, value, traceback):
|
||||||
|
if self.message:
|
||||||
|
logger.info(self.message.format(self.duration()))
|
||||||
|
|
||||||
|
def __call__(self) -> float:
|
||||||
|
return time.time() - self.start
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.duration()
|
Before Width: | Height: | Size: 206 KiB |
Before Width: | Height: | Size: 108 KiB |
@ -1,16 +0,0 @@
|
|||||||
# Benchmarks
|
|
||||||
|
|
||||||
## Acceleration with Multi-GPUs
|
|
||||||
|
|
||||||
We compare the training time with 1, 2, 4, 8 Tesla V100 GPUs (with a subset of LibriSpeech samples whose audio durations are between 6.0 and 7.0 seconds). And it shows that a **near-linear** acceleration with multiple GPUs has been achieved. In the following figure, the time (in seconds) cost for training is printed on the blue bars.
|
|
||||||
|
|
||||||
<img src="../images/multi_gpu_speedup.png" width=450>
|
|
||||||
|
|
||||||
| # of GPU | Acceleration Rate |
|
|
||||||
| -------- | --------------: |
|
|
||||||
| 1 | 1.00 X |
|
|
||||||
| 2 | 1.98 X |
|
|
||||||
| 4 | 3.73 X |
|
|
||||||
| 8 | 6.95 X |
|
|
||||||
|
|
||||||
`utils/profile.sh` provides such a demo profiling tool, you can change it as need.
|
|
Before Width: | Height: | Size: 93 KiB After Width: | Height: | Size: 93 KiB |
Before Width: | Height: | Size: 93 KiB After Width: | Height: | Size: 93 KiB |
@ -1,20 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
source path.sh
|
|
||||||
|
|
||||||
# run on MacOS
|
|
||||||
# brew install portaudio
|
|
||||||
# pip install pyaudio
|
|
||||||
# pip install keyboard
|
|
||||||
|
|
||||||
# start demo client
|
|
||||||
python3 -u ${BIN_DIR}/deploy/client.py \
|
|
||||||
--host_ip="localhost" \
|
|
||||||
--host_port=8086 \
|
|
||||||
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Failed in starting demo client!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
exit 0
|
|
@ -1,40 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# TODO: replace the model with a mandarin model
|
|
||||||
|
|
||||||
if [[ $# != 1 ]];then
|
|
||||||
echo "usage: $1 checkpoint_path"
|
|
||||||
exit -1
|
|
||||||
fi
|
|
||||||
|
|
||||||
source path.sh
|
|
||||||
|
|
||||||
# download language model
|
|
||||||
bash local/download_lm_ch.sh
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# download well-trained model
|
|
||||||
#bash local/download_model.sh
|
|
||||||
#if [ $? -ne 0 ]; then
|
|
||||||
# exit 1
|
|
||||||
#fi
|
|
||||||
|
|
||||||
# start demo server
|
|
||||||
CUDA_VISIBLE_DEVICES=0 \
|
|
||||||
python3 -u ${BIN_DIR}/deploy/server.py \
|
|
||||||
--device 'gpu' \
|
|
||||||
--nproc 1 \
|
|
||||||
--config conf/deepspeech2.yaml \
|
|
||||||
--host_ip="localhost" \
|
|
||||||
--host_port=8086 \
|
|
||||||
--speech_save_dir="demo_cache" \
|
|
||||||
--checkpoint_path ${1}
|
|
||||||
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Failed in starting demo server!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
exit 0
|
|
@ -1,28 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# grid-search for hyper-parameters in language model
|
|
||||||
python3 -u ${BIN_DIR}/tune.py \
|
|
||||||
--device 'gpu' \
|
|
||||||
--nproc 1 \
|
|
||||||
--config conf/deepspeech2.yaml \
|
|
||||||
--num_batches=10 \
|
|
||||||
--batch_size=128 \
|
|
||||||
--beam_size=300 \
|
|
||||||
--num_proc_bsearch=8 \
|
|
||||||
--num_alphas=10 \
|
|
||||||
--num_betas=10 \
|
|
||||||
--alpha_from=0.0 \
|
|
||||||
--alpha_to=5.0 \
|
|
||||||
--beta_from=-6 \
|
|
||||||
--beta_to=6 \
|
|
||||||
--cutoff_prob=1.0 \
|
|
||||||
--cutoff_top_n=40 \
|
|
||||||
--checkpoint_path ${1}
|
|
||||||
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Failed in tuning!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
exit 0
|
|
@ -0,0 +1,58 @@
|
|||||||
|
# [CC-CEDICT](https://cc-cedict.org/wiki/)
|
||||||
|
|
||||||
|
What is CC-CEDICT?
|
||||||
|
CC-CEDICT is a continuation of the CEDICT project.
|
||||||
|
The objective of the CEDICT project was to create an online, downloadable (as opposed to searchable-only) public-domain Chinese-English dictionary.
|
||||||
|
CEDICT was started by Paul Andrew Denisowski in October 1997.
|
||||||
|
For the most part, the project is modeled on Jim Breen's highly successful EDICT (Japanese-English dictionary) project and is intended to be a collaborative effort,
|
||||||
|
with users providing entries and corrections to the main file.
|
||||||
|
|
||||||
|
|
||||||
|
## Parse CC-CEDICT to Json format
|
||||||
|
|
||||||
|
1. Parse to Json
|
||||||
|
|
||||||
|
```
|
||||||
|
run.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Result
|
||||||
|
|
||||||
|
```
|
||||||
|
exp/
|
||||||
|
|-- cedict
|
||||||
|
`-- cedict.json
|
||||||
|
|
||||||
|
0 directories, 2 files
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
4c4bffc84e24467fe1b2ea9ba37ed6b6 exp/cedict
|
||||||
|
3adf504dacd13886f88cc9fe3b37c75d exp/cedict.json
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
==> exp/cedict <==
|
||||||
|
# CC-CEDICT
|
||||||
|
# Community maintained free Chinese-English dictionary.
|
||||||
|
#
|
||||||
|
# Published by MDBG
|
||||||
|
#
|
||||||
|
# License:
|
||||||
|
# Creative Commons Attribution-ShareAlike 4.0 International License
|
||||||
|
# https://creativecommons.org/licenses/by-sa/4.0/
|
||||||
|
#
|
||||||
|
# Referenced works:
|
||||||
|
|
||||||
|
==> exp/cedict.json <==
|
||||||
|
{"traditional": "2019\u51a0\u72c0\u75c5\u6bd2\u75c5", "simplified": "2019\u51a0\u72b6\u75c5\u6bd2\u75c5", "pinyin": "er4 ling2 yi1 jiu3 guan1 zhuang4 bing4 du2 bing4", "english": "COVID-19, the coronavirus disease identified in 2019"}
|
||||||
|
{"traditional": "21\u4e09\u9ad4\u7d9c\u5408\u75c7", "simplified": "21\u4e09\u4f53\u7efc\u5408\u75c7", "pinyin": "er4 shi2 yi1 san1 ti3 zong1 he2 zheng4", "english": "trisomy"}
|
||||||
|
{"traditional": "3C", "simplified": "3C", "pinyin": "san1 C", "english": "abbr. for computers, communications, and consumer electronics"}
|
||||||
|
{"traditional": "3P", "simplified": "3P", "pinyin": "san1 P", "english": "(slang) threesome"}
|
||||||
|
{"traditional": "3Q", "simplified": "3Q", "pinyin": "san1 Q", "english": "(Internet slang) thank you (loanword)"}
|
||||||
|
{"traditional": "421", "simplified": "421", "pinyin": "si4 er4 yi1", "english": "four grandparents, two parents and an only child"}
|
||||||
|
{"traditional": "502\u81a0", "simplified": "502\u80f6", "pinyin": "wu3 ling2 er4 jiao1", "english": "cyanoacrylate glue"}
|
||||||
|
{"traditional": "88", "simplified": "88", "pinyin": "ba1 ba1", "english": "(Internet slang) bye-bye (alternative for \u62dc\u62dc[bai2 bai2])"}
|
||||||
|
{"traditional": "996", "simplified": "996", "pinyin": "jiu3 jiu3 liu4", "english": "9am-9pm, six days a week (work schedule)"}
|
||||||
|
{"traditional": "A", "simplified": "A", "pinyin": "A", "english": "(slang) (Tw) to steal"}
|
||||||
|
```
|
@ -1,5 +0,0 @@
|
|||||||
# Download Baker dataset
|
|
||||||
|
|
||||||
Baker dataset has to be downloaded mannually and moved to 'data/', because you will have to pass the CATTCHA from a browswe to download the dataset.
|
|
||||||
|
|
||||||
Download URL https://test.data-baker.com/#/data/index/source.
|
|
@ -0,0 +1,3 @@
|
|||||||
|
# G2P
|
||||||
|
|
||||||
|
* zh - Chinese G2P
|
@ -1,4 +1,4 @@
|
|||||||
export MAIN_ROOT=`realpath ${PWD}/../../`
|
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
||||||
export LC_ALL=C
|
export LC_ALL=C
|
@ -1,10 +1,17 @@
|
|||||||
# LibriSpeech
|
# LibriSpeech
|
||||||
|
|
||||||
|
## Data
|
||||||
|
| Data Subset | Duration in Seconds |
|
||||||
|
| --- | --- |
|
||||||
|
| data/manifest.train | 0.83s ~ 29.735s |
|
||||||
|
| data/manifest.dev | 1.065 ~ 35.155s |
|
||||||
|
| data/manifest.test-clean | 1.285s ~ 34.955s |
|
||||||
|
|
||||||
## Deepspeech2
|
## Deepspeech2
|
||||||
|
|
||||||
| Model | Params | release | Config | Test set | Loss | WER |
|
| Model | Params | release | Config | Test set | Loss | WER |
|
||||||
| --- | --- | --- | --- | --- | --- | --- |
|
| --- | --- | --- | --- | --- | --- | --- |
|
||||||
| DeepSpeech2 | 42.96M | 2.2.0 | conf/deepspeech2.yaml + spec_aug | 14.49190807 | test-clean | 0.067283 |
|
| DeepSpeech2 | 42.96M | 2.2.0 | conf/deepspeech2.yaml + spec_aug | test-clean | 14.49190807 | 0.067283 |
|
||||||
| DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | 15.184467315673828 | test-clean | 0.072154 |
|
| DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | test-clean | 15.184467315673828 | 0.072154 |
|
||||||
| DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | - | test-clean | 0.073973 |
|
| DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | test-clean | - | 0.073973 |
|
||||||
| DeepSpeech2 | 42.96M | 1.8.5 | - | test-clean | - | 0.074939 |
|
| DeepSpeech2 | 42.96M | 1.8.5 | - | test-clean | - | 0.074939 |
|
||||||
|
@ -1,33 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
if [ $# != 1 ];then
|
|
||||||
echo "usage: tune ckpt_path"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# grid-search for hyper-parameters in language model
|
|
||||||
python3 -u ${BIN_DIR}/tune.py \
|
|
||||||
--device 'gpu' \
|
|
||||||
--nproc 1 \
|
|
||||||
--config conf/deepspeech2.yaml \
|
|
||||||
--num_batches=-1 \
|
|
||||||
--batch_size=128 \
|
|
||||||
--beam_size=500 \
|
|
||||||
--num_proc_bsearch=12 \
|
|
||||||
--num_alphas=45 \
|
|
||||||
--num_betas=8 \
|
|
||||||
--alpha_from=1.0 \
|
|
||||||
--alpha_to=3.2 \
|
|
||||||
--beta_from=0.1 \
|
|
||||||
--beta_to=0.45 \
|
|
||||||
--cutoff_prob=1.0 \
|
|
||||||
--cutoff_top_n=40 \
|
|
||||||
--checkpoint_path ${1}
|
|
||||||
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Failed in tuning!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
exit 0
|
|
@ -0,0 +1,3 @@
|
|||||||
|
# Ngram LM
|
||||||
|
|
||||||
|
* s0 - kenlm ngram lm
|
@ -0,0 +1 @@
|
|||||||
|
data/lm
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue