commit
84020a0471
@ -0,0 +1,219 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Contains U2 model."""
|
||||
import paddle
|
||||
from paddle import distributed as dist
|
||||
from paddle.io import DataLoader
|
||||
|
||||
from deepspeech.io.collator import SpeechCollator
|
||||
from deepspeech.io.dataset import ManifestDataset
|
||||
from deepspeech.io.sampler import SortagradBatchSampler
|
||||
from deepspeech.io.sampler import SortagradDistributedBatchSampler
|
||||
from deepspeech.models.u2 import U2Evaluator
|
||||
from deepspeech.models.u2 import U2Model
|
||||
from deepspeech.models.u2 import U2Updater
|
||||
from deepspeech.training.extensions.snapshot import Snapshot
|
||||
from deepspeech.training.extensions.visualizer import VisualDL
|
||||
from deepspeech.training.optimizer import OptimizerFactory
|
||||
from deepspeech.training.scheduler import LRSchedulerFactory
|
||||
from deepspeech.training.timer import Timer
|
||||
from deepspeech.training.trainer import Trainer
|
||||
from deepspeech.training.updaters.trainer import Trainer as NewTrainer
|
||||
from deepspeech.utils import layer_tools
|
||||
from deepspeech.utils.log import Log
|
||||
|
||||
logger = Log(__name__).getlog()
|
||||
|
||||
|
||||
class U2Trainer(Trainer):
|
||||
def __init__(self, config, args):
|
||||
super().__init__(config, args)
|
||||
|
||||
def setup_dataloader(self):
|
||||
config = self.config.clone()
|
||||
config.defrost()
|
||||
config.collator.keep_transcription_text = False
|
||||
|
||||
# train/valid dataset, return token ids
|
||||
config.data.manifest = config.data.train_manifest
|
||||
train_dataset = ManifestDataset.from_config(config)
|
||||
|
||||
config.data.manifest = config.data.dev_manifest
|
||||
dev_dataset = ManifestDataset.from_config(config)
|
||||
|
||||
collate_fn_train = SpeechCollator.from_config(config)
|
||||
|
||||
config.collator.augmentation_config = ""
|
||||
collate_fn_dev = SpeechCollator.from_config(config)
|
||||
|
||||
if self.parallel:
|
||||
batch_sampler = SortagradDistributedBatchSampler(
|
||||
train_dataset,
|
||||
batch_size=config.collator.batch_size,
|
||||
num_replicas=None,
|
||||
rank=None,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
sortagrad=config.collator.sortagrad,
|
||||
shuffle_method=config.collator.shuffle_method)
|
||||
else:
|
||||
batch_sampler = SortagradBatchSampler(
|
||||
train_dataset,
|
||||
shuffle=True,
|
||||
batch_size=config.collator.batch_size,
|
||||
drop_last=True,
|
||||
sortagrad=config.collator.sortagrad,
|
||||
shuffle_method=config.collator.shuffle_method)
|
||||
self.train_loader = DataLoader(
|
||||
train_dataset,
|
||||
batch_sampler=batch_sampler,
|
||||
collate_fn=collate_fn_train,
|
||||
num_workers=config.collator.num_workers, )
|
||||
self.valid_loader = DataLoader(
|
||||
dev_dataset,
|
||||
batch_size=config.collator.batch_size,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
collate_fn=collate_fn_dev)
|
||||
|
||||
# test dataset, return raw text
|
||||
config.data.manifest = config.data.test_manifest
|
||||
# filter test examples, will cause less examples, but no mismatch with training
|
||||
# and can use large batch size , save training time, so filter test egs now.
|
||||
config.data.min_input_len = 0.0 # second
|
||||
config.data.max_input_len = float('inf') # second
|
||||
config.data.min_output_len = 0.0 # tokens
|
||||
config.data.max_output_len = float('inf') # tokens
|
||||
config.data.min_output_input_ratio = 0.00
|
||||
config.data.max_output_input_ratio = float('inf')
|
||||
|
||||
test_dataset = ManifestDataset.from_config(config)
|
||||
# return text ord id
|
||||
config.collator.keep_transcription_text = True
|
||||
config.collator.augmentation_config = ""
|
||||
self.test_loader = DataLoader(
|
||||
test_dataset,
|
||||
batch_size=config.decoding.batch_size,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
collate_fn=SpeechCollator.from_config(config))
|
||||
# return text token id
|
||||
config.collator.keep_transcription_text = False
|
||||
self.align_loader = DataLoader(
|
||||
test_dataset,
|
||||
batch_size=config.decoding.batch_size,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
collate_fn=SpeechCollator.from_config(config))
|
||||
logger.info("Setup train/valid/test/align Dataloader!")
|
||||
|
||||
def setup_model(self):
|
||||
config = self.config
|
||||
model_conf = config.model
|
||||
model_conf.defrost()
|
||||
model_conf.input_dim = self.train_loader.collate_fn.feature_size
|
||||
model_conf.output_dim = self.train_loader.collate_fn.vocab_size
|
||||
model_conf.freeze()
|
||||
model = U2Model.from_config(model_conf)
|
||||
|
||||
if self.parallel:
|
||||
model = paddle.DataParallel(model)
|
||||
|
||||
model.train()
|
||||
logger.info(f"{model}")
|
||||
layer_tools.print_params(model, logger.info)
|
||||
|
||||
train_config = config.training
|
||||
optim_type = train_config.optim
|
||||
optim_conf = train_config.optim_conf
|
||||
scheduler_type = train_config.scheduler
|
||||
scheduler_conf = train_config.scheduler_conf
|
||||
|
||||
scheduler_args = {
|
||||
"learning_rate": optim_conf.lr,
|
||||
"verbose": False,
|
||||
"warmup_steps": scheduler_conf.warmup_steps,
|
||||
"gamma": scheduler_conf.lr_decay,
|
||||
"d_model": model_conf.encoder_conf.output_size,
|
||||
}
|
||||
lr_scheduler = LRSchedulerFactory.from_args(scheduler_type,
|
||||
scheduler_args)
|
||||
|
||||
def optimizer_args(
|
||||
config,
|
||||
parameters,
|
||||
lr_scheduler=None, ):
|
||||
train_config = config.training
|
||||
optim_type = train_config.optim
|
||||
optim_conf = train_config.optim_conf
|
||||
scheduler_type = train_config.scheduler
|
||||
scheduler_conf = train_config.scheduler_conf
|
||||
return {
|
||||
"grad_clip": train_config.global_grad_clip,
|
||||
"weight_decay": optim_conf.weight_decay,
|
||||
"learning_rate": lr_scheduler
|
||||
if lr_scheduler else optim_conf.lr,
|
||||
"parameters": parameters,
|
||||
"epsilon": 1e-9 if optim_type == 'noam' else None,
|
||||
"beta1": 0.9 if optim_type == 'noam' else None,
|
||||
"beat2": 0.98 if optim_type == 'noam' else None,
|
||||
}
|
||||
|
||||
optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
|
||||
optimizer = OptimizerFactory.from_args(optim_type, optimzer_args)
|
||||
|
||||
self.model = model
|
||||
self.optimizer = optimizer
|
||||
self.lr_scheduler = lr_scheduler
|
||||
logger.info("Setup model/optimizer/lr_scheduler!")
|
||||
|
||||
def setup_updater(self):
|
||||
output_dir = self.output_dir
|
||||
config = self.config.training
|
||||
|
||||
updater = U2Updater(
|
||||
model=self.model,
|
||||
optimizer=self.optimizer,
|
||||
scheduler=self.lr_scheduler,
|
||||
dataloader=self.train_loader,
|
||||
output_dir=output_dir,
|
||||
accum_grad=config.accum_grad)
|
||||
|
||||
trainer = NewTrainer(updater, (config.n_epoch, 'epoch'), output_dir)
|
||||
|
||||
evaluator = U2Evaluator(self.model, self.valid_loader)
|
||||
|
||||
trainer.extend(evaluator, trigger=(1, "epoch"))
|
||||
|
||||
if dist.get_rank() == 0:
|
||||
trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
|
||||
num_snapshots = config.checkpoint.kbest_n
|
||||
trainer.extend(
|
||||
Snapshot(
|
||||
mode='kbest',
|
||||
max_size=num_snapshots,
|
||||
indicator='VALID/LOSS',
|
||||
less_better=True),
|
||||
trigger=(1, 'epoch'))
|
||||
# print(trainer.extensions)
|
||||
# trainer.run()
|
||||
self.trainer = trainer
|
||||
|
||||
def run(self):
|
||||
"""The routine of the experiment after setup. This method is intended
|
||||
to be used by the user.
|
||||
"""
|
||||
self.setup_updater()
|
||||
with Timer("Training Done: {}"):
|
||||
self.trainer.run()
|
@ -0,0 +1,19 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from .u2 import U2InferModel
|
||||
from .u2 import U2Model
|
||||
from .updater import U2Evaluator
|
||||
from .updater import U2Updater
|
||||
|
||||
__all__ = ["U2Model", "U2InferModel", "U2Evaluator", "U2Updater"]
|
@ -0,0 +1,149 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from contextlib import nullcontext
|
||||
|
||||
import paddle
|
||||
from paddle import distributed as dist
|
||||
|
||||
from deepspeech.training.extensions.evaluator import StandardEvaluator
|
||||
from deepspeech.training.reporter import report
|
||||
from deepspeech.training.timer import Timer
|
||||
from deepspeech.training.updaters.standard_updater import StandardUpdater
|
||||
from deepspeech.utils import layer_tools
|
||||
from deepspeech.utils.log import Log
|
||||
|
||||
logger = Log(__name__).getlog()
|
||||
|
||||
|
||||
class U2Evaluator(StandardEvaluator):
|
||||
def __init__(self, model, dataloader):
|
||||
super().__init__(model, dataloader)
|
||||
self.msg = ""
|
||||
self.num_seen_utts = 0
|
||||
self.total_loss = 0.0
|
||||
|
||||
def evaluate_core(self, batch):
|
||||
self.msg = "Valid: Rank: {}, ".format(dist.get_rank())
|
||||
losses_dict = {}
|
||||
|
||||
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||
if paddle.isfinite(loss):
|
||||
num_utts = batch[1].shape[0]
|
||||
self.num_seen_utts += num_utts
|
||||
self.total_loss += float(loss) * num_utts
|
||||
|
||||
losses_dict['loss'] = float(loss)
|
||||
if attention_loss:
|
||||
losses_dict['att_loss'] = float(attention_loss)
|
||||
if ctc_loss:
|
||||
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||
|
||||
for k, v in losses_dict.items():
|
||||
report("eval/" + k, v)
|
||||
|
||||
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||
for k, v in losses_dict.items())
|
||||
logger.info(self.msg)
|
||||
return self.total_loss, self.num_seen_utts
|
||||
|
||||
|
||||
class U2Updater(StandardUpdater):
|
||||
def __init__(self,
|
||||
model,
|
||||
optimizer,
|
||||
scheduler,
|
||||
dataloader,
|
||||
init_state=None,
|
||||
accum_grad=1,
|
||||
**kwargs):
|
||||
super().__init__(
|
||||
model, optimizer, scheduler, dataloader, init_state=init_state)
|
||||
self.accum_grad = accum_grad
|
||||
self.forward_count = 0
|
||||
self.msg = ""
|
||||
|
||||
def update_core(self, batch):
|
||||
"""One Step
|
||||
|
||||
Args:
|
||||
batch (List[Object]): utts, xs, xlens, ys, ylens
|
||||
"""
|
||||
losses_dict = {}
|
||||
self.msg = "Rank: {}, ".format(dist.get_rank())
|
||||
|
||||
# forward
|
||||
batch_size = batch[1].shape[0]
|
||||
loss, attention_loss, ctc_loss = self.model(*batch[1:])
|
||||
# loss div by `batch_size * accum_grad`
|
||||
loss /= self.accum_grad
|
||||
|
||||
# loss backward
|
||||
if (self.forward_count + 1) != self.accum_grad:
|
||||
# Disable gradient synchronizations across DDP processes.
|
||||
# Within this context, gradients will be accumulated on module
|
||||
# variables, which will later be synchronized.
|
||||
context = self.model.no_sync
|
||||
else:
|
||||
# Used for single gpu training and DDP gradient synchronization
|
||||
# processes.
|
||||
context = nullcontext
|
||||
|
||||
with context():
|
||||
loss.backward()
|
||||
layer_tools.print_grads(self.model, print_func=None)
|
||||
|
||||
# loss info
|
||||
losses_dict['loss'] = float(loss) * self.accum_grad
|
||||
if attention_loss:
|
||||
losses_dict['att_loss'] = float(attention_loss)
|
||||
if ctc_loss:
|
||||
losses_dict['ctc_loss'] = float(ctc_loss)
|
||||
# report loss
|
||||
for k, v in losses_dict.items():
|
||||
report("train/" + k, v)
|
||||
# loss msg
|
||||
self.msg += "batch size: {}, ".format(batch_size)
|
||||
self.msg += "accum: {}, ".format(self.accum_grad)
|
||||
self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
|
||||
for k, v in losses_dict.items())
|
||||
|
||||
# Truncate the graph
|
||||
loss.detach()
|
||||
|
||||
# update parameters
|
||||
self.forward_count += 1
|
||||
if self.forward_count != self.accum_grad:
|
||||
return
|
||||
self.forward_count = 0
|
||||
|
||||
self.optimizer.step()
|
||||
self.optimizer.clear_grad()
|
||||
self.scheduler.step()
|
||||
|
||||
def update(self):
|
||||
# model is default in train mode
|
||||
|
||||
# training for a step is implemented here
|
||||
with Timer("data time cost:{}"):
|
||||
batch = self.read_batch()
|
||||
with Timer("step time cost:{}"):
|
||||
self.update_core(batch)
|
||||
|
||||
# #iterations with accum_grad > 1
|
||||
# Ref.: https://github.com/espnet/espnet/issues/777
|
||||
if self.forward_count == 0:
|
||||
self.state.iteration += 1
|
||||
if self.updates_per_epoch is not None:
|
||||
if self.state.iteration % self.updates_per_epoch == 0:
|
||||
self.state.epoch += 1
|
@ -0,0 +1,50 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import datetime
|
||||
import time
|
||||
|
||||
from deepspeech.utils.log import Log
|
||||
|
||||
__all__ = ["Timer"]
|
||||
|
||||
logger = Log(__name__).getlog()
|
||||
|
||||
|
||||
class Timer():
|
||||
"""To be used like this:
|
||||
with Timer("Message") as value:
|
||||
do some thing
|
||||
"""
|
||||
|
||||
def __init__(self, message=None):
|
||||
self.message = message
|
||||
|
||||
def duration(self) -> str:
|
||||
elapsed_time = time.time() - self.start
|
||||
time_str = str(datetime.timedelta(seconds=elapsed_time))
|
||||
return time_str
|
||||
|
||||
def __enter__(self):
|
||||
self.start = time.time()
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if self.message:
|
||||
logger.info(self.message.format(self.duration()))
|
||||
|
||||
def __call__(self) -> float:
|
||||
return time.time() - self.start
|
||||
|
||||
def __str__(self):
|
||||
return self.duration()
|
Before Width: | Height: | Size: 206 KiB |
Before Width: | Height: | Size: 108 KiB |
@ -1,16 +0,0 @@
|
||||
# Benchmarks
|
||||
|
||||
## Acceleration with Multi-GPUs
|
||||
|
||||
We compare the training time with 1, 2, 4, 8 Tesla V100 GPUs (with a subset of LibriSpeech samples whose audio durations are between 6.0 and 7.0 seconds). And it shows that a **near-linear** acceleration with multiple GPUs has been achieved. In the following figure, the time (in seconds) cost for training is printed on the blue bars.
|
||||
|
||||
<img src="../images/multi_gpu_speedup.png" width=450>
|
||||
|
||||
| # of GPU | Acceleration Rate |
|
||||
| -------- | --------------: |
|
||||
| 1 | 1.00 X |
|
||||
| 2 | 1.98 X |
|
||||
| 4 | 3.73 X |
|
||||
| 8 | 6.95 X |
|
||||
|
||||
`utils/profile.sh` provides such a demo profiling tool, you can change it as need.
|
Before Width: | Height: | Size: 93 KiB After Width: | Height: | Size: 93 KiB |
Before Width: | Height: | Size: 93 KiB After Width: | Height: | Size: 93 KiB |
@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
source path.sh
|
||||
|
||||
# run on MacOS
|
||||
# brew install portaudio
|
||||
# pip install pyaudio
|
||||
# pip install keyboard
|
||||
|
||||
# start demo client
|
||||
python3 -u ${BIN_DIR}/deploy/client.py \
|
||||
--host_ip="localhost" \
|
||||
--host_port=8086 \
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Failed in starting demo client!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
@ -1,40 +0,0 @@
|
||||
#!/bin/bash
|
||||
# TODO: replace the model with a mandarin model
|
||||
|
||||
if [[ $# != 1 ]];then
|
||||
echo "usage: $1 checkpoint_path"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
source path.sh
|
||||
|
||||
# download language model
|
||||
bash local/download_lm_ch.sh
|
||||
if [ $? -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# download well-trained model
|
||||
#bash local/download_model.sh
|
||||
#if [ $? -ne 0 ]; then
|
||||
# exit 1
|
||||
#fi
|
||||
|
||||
# start demo server
|
||||
CUDA_VISIBLE_DEVICES=0 \
|
||||
python3 -u ${BIN_DIR}/deploy/server.py \
|
||||
--device 'gpu' \
|
||||
--nproc 1 \
|
||||
--config conf/deepspeech2.yaml \
|
||||
--host_ip="localhost" \
|
||||
--host_port=8086 \
|
||||
--speech_save_dir="demo_cache" \
|
||||
--checkpoint_path ${1}
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Failed in starting demo server!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
exit 0
|
@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# grid-search for hyper-parameters in language model
|
||||
python3 -u ${BIN_DIR}/tune.py \
|
||||
--device 'gpu' \
|
||||
--nproc 1 \
|
||||
--config conf/deepspeech2.yaml \
|
||||
--num_batches=10 \
|
||||
--batch_size=128 \
|
||||
--beam_size=300 \
|
||||
--num_proc_bsearch=8 \
|
||||
--num_alphas=10 \
|
||||
--num_betas=10 \
|
||||
--alpha_from=0.0 \
|
||||
--alpha_to=5.0 \
|
||||
--beta_from=-6 \
|
||||
--beta_to=6 \
|
||||
--cutoff_prob=1.0 \
|
||||
--cutoff_top_n=40 \
|
||||
--checkpoint_path ${1}
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Failed in tuning!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
exit 0
|
@ -0,0 +1,58 @@
|
||||
# [CC-CEDICT](https://cc-cedict.org/wiki/)
|
||||
|
||||
What is CC-CEDICT?
|
||||
CC-CEDICT is a continuation of the CEDICT project.
|
||||
The objective of the CEDICT project was to create an online, downloadable (as opposed to searchable-only) public-domain Chinese-English dictionary.
|
||||
CEDICT was started by Paul Andrew Denisowski in October 1997.
|
||||
For the most part, the project is modeled on Jim Breen's highly successful EDICT (Japanese-English dictionary) project and is intended to be a collaborative effort,
|
||||
with users providing entries and corrections to the main file.
|
||||
|
||||
|
||||
## Parse CC-CEDICT to Json format
|
||||
|
||||
1. Parse to Json
|
||||
|
||||
```
|
||||
run.sh
|
||||
```
|
||||
|
||||
2. Result
|
||||
|
||||
```
|
||||
exp/
|
||||
|-- cedict
|
||||
`-- cedict.json
|
||||
|
||||
0 directories, 2 files
|
||||
```
|
||||
|
||||
```
|
||||
4c4bffc84e24467fe1b2ea9ba37ed6b6 exp/cedict
|
||||
3adf504dacd13886f88cc9fe3b37c75d exp/cedict.json
|
||||
```
|
||||
|
||||
```
|
||||
==> exp/cedict <==
|
||||
# CC-CEDICT
|
||||
# Community maintained free Chinese-English dictionary.
|
||||
#
|
||||
# Published by MDBG
|
||||
#
|
||||
# License:
|
||||
# Creative Commons Attribution-ShareAlike 4.0 International License
|
||||
# https://creativecommons.org/licenses/by-sa/4.0/
|
||||
#
|
||||
# Referenced works:
|
||||
|
||||
==> exp/cedict.json <==
|
||||
{"traditional": "2019\u51a0\u72c0\u75c5\u6bd2\u75c5", "simplified": "2019\u51a0\u72b6\u75c5\u6bd2\u75c5", "pinyin": "er4 ling2 yi1 jiu3 guan1 zhuang4 bing4 du2 bing4", "english": "COVID-19, the coronavirus disease identified in 2019"}
|
||||
{"traditional": "21\u4e09\u9ad4\u7d9c\u5408\u75c7", "simplified": "21\u4e09\u4f53\u7efc\u5408\u75c7", "pinyin": "er4 shi2 yi1 san1 ti3 zong1 he2 zheng4", "english": "trisomy"}
|
||||
{"traditional": "3C", "simplified": "3C", "pinyin": "san1 C", "english": "abbr. for computers, communications, and consumer electronics"}
|
||||
{"traditional": "3P", "simplified": "3P", "pinyin": "san1 P", "english": "(slang) threesome"}
|
||||
{"traditional": "3Q", "simplified": "3Q", "pinyin": "san1 Q", "english": "(Internet slang) thank you (loanword)"}
|
||||
{"traditional": "421", "simplified": "421", "pinyin": "si4 er4 yi1", "english": "four grandparents, two parents and an only child"}
|
||||
{"traditional": "502\u81a0", "simplified": "502\u80f6", "pinyin": "wu3 ling2 er4 jiao1", "english": "cyanoacrylate glue"}
|
||||
{"traditional": "88", "simplified": "88", "pinyin": "ba1 ba1", "english": "(Internet slang) bye-bye (alternative for \u62dc\u62dc[bai2 bai2])"}
|
||||
{"traditional": "996", "simplified": "996", "pinyin": "jiu3 jiu3 liu4", "english": "9am-9pm, six days a week (work schedule)"}
|
||||
{"traditional": "A", "simplified": "A", "pinyin": "A", "english": "(slang) (Tw) to steal"}
|
||||
```
|
@ -1,5 +0,0 @@
|
||||
# Download Baker dataset
|
||||
|
||||
Baker dataset has to be downloaded mannually and moved to 'data/', because you will have to pass the CATTCHA from a browswe to download the dataset.
|
||||
|
||||
Download URL https://test.data-baker.com/#/data/index/source.
|
@ -0,0 +1,3 @@
|
||||
# G2P
|
||||
|
||||
* zh - Chinese G2P
|
@ -1,4 +1,4 @@
|
||||
export MAIN_ROOT=`realpath ${PWD}/../../`
|
||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
||||
|
||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
||||
export LC_ALL=C
|
@ -1,10 +1,17 @@
|
||||
# LibriSpeech
|
||||
|
||||
## Data
|
||||
| Data Subset | Duration in Seconds |
|
||||
| --- | --- |
|
||||
| data/manifest.train | 0.83s ~ 29.735s |
|
||||
| data/manifest.dev | 1.065 ~ 35.155s |
|
||||
| data/manifest.test-clean | 1.285s ~ 34.955s |
|
||||
|
||||
## Deepspeech2
|
||||
|
||||
| Model | Params | release | Config | Test set | Loss | WER |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| DeepSpeech2 | 42.96M | 2.2.0 | conf/deepspeech2.yaml + spec_aug | 14.49190807 | test-clean | 0.067283 |
|
||||
| DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | 15.184467315673828 | test-clean | 0.072154 |
|
||||
| DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | - | test-clean | 0.073973 |
|
||||
| DeepSpeech2 | 42.96M | 2.2.0 | conf/deepspeech2.yaml + spec_aug | test-clean | 14.49190807 | 0.067283 |
|
||||
| DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | test-clean | 15.184467315673828 | 0.072154 |
|
||||
| DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | test-clean | - | 0.073973 |
|
||||
| DeepSpeech2 | 42.96M | 1.8.5 | - | test-clean | - | 0.074939 |
|
||||
|
@ -1,33 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ $# != 1 ];then
|
||||
echo "usage: tune ckpt_path"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# grid-search for hyper-parameters in language model
|
||||
python3 -u ${BIN_DIR}/tune.py \
|
||||
--device 'gpu' \
|
||||
--nproc 1 \
|
||||
--config conf/deepspeech2.yaml \
|
||||
--num_batches=-1 \
|
||||
--batch_size=128 \
|
||||
--beam_size=500 \
|
||||
--num_proc_bsearch=12 \
|
||||
--num_alphas=45 \
|
||||
--num_betas=8 \
|
||||
--alpha_from=1.0 \
|
||||
--alpha_to=3.2 \
|
||||
--beta_from=0.1 \
|
||||
--beta_to=0.45 \
|
||||
--cutoff_prob=1.0 \
|
||||
--cutoff_top_n=40 \
|
||||
--checkpoint_path ${1}
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Failed in tuning!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
exit 0
|
@ -0,0 +1,3 @@
|
||||
# Ngram LM
|
||||
|
||||
* s0 - kenlm ngram lm
|
@ -0,0 +1 @@
|
||||
data/lm
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue