From cda6ca8323935038efc51e911253cb12b24c923a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 16 Sep 2021 12:16:13 +0000 Subject: [PATCH 01/14] add benchmark flags, and logic --- deepspeech/exps/u2/model.py | 3 ++- deepspeech/training/cli.py | 40 ++++++++++++------------------ deepspeech/training/trainer.py | 14 +++++++++++ deepspeech/utils/utility.py | 14 ++++++++++- examples/aishell/s1/local/train.sh | 10 +++++--- examples/tiny/s1/local/train.sh | 40 +++++++++++++++++++----------- tests/benchmark/run_all.sh | 29 +++++++++++++--------- tests/benchmark/run_benchmark.sh | 21 ++++++++-------- 8 files changed, 106 insertions(+), 65 deletions(-) mode change 100644 => 100755 tests/benchmark/run_all.sh mode change 100644 => 100755 tests/benchmark/run_benchmark.sh diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 1328a1cb7..0d17d9fd2 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -100,7 +100,8 @@ class U2Trainer(Trainer): # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. - context = self.model.no_sync + # When using cpu w/o DDP, model does not have `no_sync` + context = self.model.no_sync if self.parallel else nullcontext else: # Used for single gpu training and DDP gradient synchronization # processes. diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py index 1477bdfe0..d8719b3ab 100644 --- a/deepspeech/training/cli.py +++ b/deepspeech/training/cli.py @@ -44,32 +44,24 @@ def default_argument_parser(): parser = argparse.ArgumentParser() # yapf: disable - # data and output - parser.add_argument("--config", metavar="FILE", help="path of the config file to overwrite to default config with.") - parser.add_argument("--dump-config", metavar="FILE", help="dump config to yaml file.") - parser.add_argument("--output", metavar="OUTPUT_DIR", help="path to save checkpoint and logs.") - - # load from saved checkpoint - parser.add_argument("--checkpoint_path", type=str, help="path of the checkpoint to load") - - # running - parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], - help="device type to use, cpu and gpu are supported.") - parser.add_argument("--nprocs", type=int, default=1, help="number of parallel processes to use.") - - # overwrite extra config and default config - # parser.add_argument("--opts", nargs=argparse.REMAINDER, - # help="options to overwrite --config file and the default config, passing in KEY VALUE pairs") - parser.add_argument("--opts", type=str, default=[], nargs='+', - help="options to overwrite --config file and the default config, passing in KEY VALUE pairs") - - # random seed - parser.add_argument("--seed", type=int, default=None, + train_group = parser.add_argument_group(title='Train Options', description=None) + train_group.add_argument("--seed", type=int, default=None, help="seed to use for paddle, np and random. None or 0 for random, else set seed.") - - # profiler - parser.add_argument('--profiler_options', type=str, default=None, + train_group.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], + help="device cpu and gpu are supported.") + train_group.add_argument("--nprocs", type=int, default=1, help="number of parallel processes. 0 for cpu.") + train_group.add_argument("--config", metavar="CONFIG_FILE", help="config file.") + train_group.add_argument("--output", metavar="CKPT_DIR", help="path to save checkpoint.") + train_group.add_argument("--checkpoint_path", type=str, help="path to load checkpoint") + train_group.add_argument("--opts", type=str, default=[], nargs='+', + help="overwrite --config file, passing in LIST[KEY VALUE] pairs") + train_group.add_argument("--dump-config", metavar="FILE", help="dump config to `this` file.") + + bech_group = parser.add_argument_group(title='Benchmark Options', description=None) + bech_group.add_argument('--profiler-options', type=str, default=None, help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".') + bech_group.add_argument('--benchmark-batch-size', type=int, default=None, help='batch size for benchmark.') + bech_group.add_argument('--benchmark-max-step', type=int, default=None, help='max iteration for benchmark.') # yapd: enable return parser diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 6587f1290..9549a4dd0 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import sys import time from pathlib import Path @@ -24,6 +25,7 @@ from deepspeech.utils import profiler from deepspeech.utils.checkpoint import Checkpoint from deepspeech.utils.log import Log from deepspeech.utils.utility import seed_all +from deepspeech.utils.utility import UpdateConfig __all__ = ["Trainer"] @@ -101,6 +103,12 @@ class Trainer(): seed_all(args.seed) logger.info(f"Set seed {args.seed}") + if self.args.benchmark_batch_size: + with UpdateConfig(self.config): + self.config.collator.batch_size = self.args.benchmark_batch_size + logger.info( + f"Benchmark reset batch-size: {self.args.benchmark_batch_size}") + def setup(self): """Setup the experiment. """ @@ -188,6 +196,12 @@ class Trainer(): if self.args.profiler_options: profiler.add_profiler_step(self.args.profiler_options) + if self.args.benchmark_max_step and self.iteration > self.args.benchmark_max_step: + logger.info( + f"Reach benchmark-max-step: {self.args.benchmark_max_step}") + sys.exit( + f"Reach benchmark-max-step: {self.args.benchmark_max_step}") + def train(self): """The training process control by epoch.""" from_scratch = self.resume_or_scratch() diff --git a/deepspeech/utils/utility.py b/deepspeech/utils/utility.py index e18fc1f77..6f84c41be 100644 --- a/deepspeech/utils/utility.py +++ b/deepspeech/utils/utility.py @@ -16,15 +16,27 @@ import distutils.util import math import os import random +from contextlib import contextmanager from typing import List import numpy as np import paddle -__all__ = ["seed_all", 'print_arguments', 'add_arguments', "log_add"] +__all__ = [ + "UpdateConfig", "seed_all", 'print_arguments', 'add_arguments', "log_add" +] + + +@contextmanager +def UpdateConfig(config): + """Update yacs config""" + config.defrost() + yield + config.freeze() def seed_all(seed: int=210329): + """freeze random generator seed.""" np.random.seed(seed) random.seed(seed) paddle.seed(seed) diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index e065ad6a8..5b9c45f50 100755 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -1,7 +1,8 @@ #!/bin/bash - profiler_options= +benchmark_batch_size= +benchmark_max_step= # seed may break model convergence seed=0 @@ -32,12 +33,15 @@ ckpt_name=$2 mkdir -p exp python3 -u ${BIN_DIR}/train.py \ +--seed ${seed} \ --device ${device} \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---profiler_options ${profiler_options} \ ---seed ${seed} +--profiler-options "${profiler-options}" \ +--benchmark-batch-size ${benchmark_batch_size} \ +--benchmark-max-step ${benchmark_max_step} + if [ ${seed} != 0 ]; then unset FLAGS_cudnn_deterministic diff --git a/examples/tiny/s1/local/train.sh b/examples/tiny/s1/local/train.sh index 374608fd1..56ceab41c 100755 --- a/examples/tiny/s1/local/train.sh +++ b/examples/tiny/s1/local/train.sh @@ -1,37 +1,49 @@ #!/bin/bash -if [ $# != 2 ];then - echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" - exit -1 -fi +profiler_options= +benchmark_batch_size= +benchmark_max_step= + +# seed may break model convergence +seed=0 + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') echo "using $ngpu gpus..." -config_path=$1 -ckpt_name=$2 - device=gpu if [ ${ngpu} == 0 ];then device=cpu fi -mkdir -p exp - -# seed may break model convergence -seed=0 -if [ ${seed} != 0 ]; then +if [ ${seed} != 0 ]; then export FLAGS_cudnn_deterministic=True + echo "using seed $seed & FLAGS_cudnn_deterministic=True ..." +fi + +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" + exit -1 fi +config_path=$1 +ckpt_name=$2 + +mkdir -p exp + python3 -u ${BIN_DIR}/train.py \ +--seed ${seed} \ --device ${device} \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---seed ${seed} +--profiler-options "${profiler_options}" \ +--benchmark-batch-size ${benchmark_batch_size} \ +--benchmark-max-step ${benchmark_max_step} + -if [ ${seed} != 0 ]; then +if [ ${seed} != 0 ]; then unset FLAGS_cudnn_deterministic fi diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh old mode 100644 new mode 100755 index 7aa11d0f2..6f707cdcb --- a/tests/benchmark/run_all.sh +++ b/tests/benchmark/run_all.sh @@ -1,41 +1,46 @@ #!/bin/bash +CUR_DIR=${PWD} ROOT_DIR=../../ # 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: # collect env info bash ${ROOT_DIR}/utils/pd_env_collect.sh -cat pd_env.txt +#cat pd_env.txt -# 执行目录:需说明 -pushd ${ROOT_DIR}/examples/aishell/s1 # 1 安装该模型需要的依赖 (如需开启优化策略请注明) -pushd ${ROOT_DIR}/tools; make; popd -source ${ROOT_DIR}/tools/venv/bin/activate -pushd ${ROOT_DIR}; bash setup.sh; popd +#pushd ${ROOT_DIR}/tools; make; popd +#source ${ROOT_DIR}/tools/venv/bin/activate +#pushd ${ROOT_DIR}; bash setup.sh; popd # 2 拷贝该模型需要数据、预训练模型 + +# 执行目录:需说明 +#pushd ${ROOT_DIR}/examples/aishell/s1 +pushd ${ROOT_DIR}/examples/tiny/s1 + mkdir -p exp/log -loca/data.sh &> exp/log/data.log +. path.sh +#bash local/data.sh &> exp/log/data.log # 3 批量运行(如不方便批量,1,2需放到单个模型中) -model_mode_list=(conformer) +model_mode_list=(conformer transformer) fp_item_list=(fp32) -bs_item=(32 64 96) +bs_item_list=(32 64 96) for model_mode in ${model_mode_list[@]}; do for fp_item in ${fp_item_list[@]}; do - for bs_item in ${bs_list[@]} + for bs_item in ${bs_item_list[@]} do echo "index is speed, 1gpus, begin, ${model_name}" run_mode=sp - CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) + CUDA_VISIBLE_DEVICES=0 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) sleep 60 echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" run_mode=mp - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} sleep 60 done done diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh old mode 100644 new mode 100755 index 625d36160..eb1117936 --- a/tests/benchmark/run_benchmark.sh +++ b/tests/benchmark/run_benchmark.sh @@ -23,19 +23,19 @@ function _train(){ echo "Train on ${num_gpu_devices} GPUs" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" - train_cmd="--model_name=${model_name} - --batch_size=${batch_size} - --fp=${fp_item} \ - --max_iter=${max_iter} " + train_cmd="--benchmark-batch-size ${batch_size} + --benchmark-max-step ${max_iter} + conf/${model_name}.yaml ${model_name}" + case ${run_mode} in - sp) train_cmd="python -u tools/train.py "${train_cmd}" ;; + sp) train_cmd="bash local/train.sh "${train_cmd}"" ;; mp) - train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd}" - log_parse_file="mylog/workerlog.0" ;; + train_cmd="bash local/train.sh "${train_cmd}"" ;; *) echo "choose run_mode(sp or mp)"; exit 1; esac -# 以下不用修改 - timeout 15m ${train_cmd} > ${log_file} 2>&1 + + # 以下不用修改 + CUDA_VISIBLE_DEVICES=${device} timeout 15m ${train_cmd} > ${log_file} 2>&1 if [ $? -ne 0 ];then echo -e "${model_name}, FAIL" export job_fail_flag=1 @@ -43,7 +43,8 @@ function _train(){ echo -e "${model_name}, SUCCESS" export job_fail_flag=0 fi - kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM if [ $run_mode = "mp" -a -d mylog ]; then rm ${log_file} From d7a33b9d5e755ad43ebef2f4d257c3832de3d823 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 16 Sep 2021 12:35:41 +0000 Subject: [PATCH 02/14] update config with contextlib --- deepspeech/exps/deepspeech2/bin/tune.py | 191 ------------------------ deepspeech/exps/deepspeech2/model.py | 8 +- deepspeech/exps/u2/model.py | 10 +- deepspeech/exps/u2/trainer.py | 9 +- deepspeech/exps/u2_kaldi/model.py | 9 +- deepspeech/exps/u2_st/model.py | 9 +- deepspeech/models/u2/u2.py | 9 +- deepspeech/models/u2_st.py | 9 +- 8 files changed, 35 insertions(+), 219 deletions(-) delete mode 100644 deepspeech/exps/deepspeech2/bin/tune.py diff --git a/deepspeech/exps/deepspeech2/bin/tune.py b/deepspeech/exps/deepspeech2/bin/tune.py deleted file mode 100644 index 94a9b6c47..000000000 --- a/deepspeech/exps/deepspeech2/bin/tune.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Beam search parameters tuning for DeepSpeech2 model.""" -import functools -import sys - -import numpy as np -from paddle.io import DataLoader - -from deepspeech.exps.deepspeech2.config import get_cfg_defaults -from deepspeech.io.collator import SpeechCollator -from deepspeech.io.dataset import ManifestDataset -from deepspeech.models.ds2 import DeepSpeech2Model -from deepspeech.training.cli import default_argument_parser -from deepspeech.utils import error_rate -from deepspeech.utils.utility import add_arguments -from deepspeech.utils.utility import print_arguments - - -def tune(config, args): - """Tune parameters alpha and beta incrementally.""" - if not args.num_alphas >= 0: - raise ValueError("num_alphas must be non-negative!") - if not args.num_betas >= 0: - raise ValueError("num_betas must be non-negative!") - config.defrost() - config.data.manfiest = config.data.dev_manifest - config.data.augmentation_config = "" - config.data.keep_transcription_text = True - dev_dataset = ManifestDataset.from_config(config) - - valid_loader = DataLoader( - dev_dataset, - batch_size=config.data.batch_size, - shuffle=False, - drop_last=False, - collate_fn=SpeechCollator(keep_transcription_text=True)) - - model = DeepSpeech2Model.from_pretrained(valid_loader, config, - args.checkpoint_path) - model.eval() - - # decoders only accept string encoded in utf-8 - vocab_list = valid_loader.dataset.vocab_list - errors_func = error_rate.char_errors if config.decoding.error_rate_type == 'cer' else error_rate.word_errors - - # create grid for search - cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) - cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) - params_grid = [(alpha, beta) for alpha in cand_alphas - for beta in cand_betas] - - err_sum = [0.0 for i in range(len(params_grid))] - err_ave = [0.0 for i in range(len(params_grid))] - - num_ins, len_refs, cur_batch = 0, 0, 0 - # initialize external scorer - model.decoder.init_decode(args.alpha_from, args.beta_from, - config.decoding.lang_model_path, vocab_list, - config.decoding.decoding_method) - ## incremental tuning parameters over multiple batches - print("start tuning ...") - for infer_data in valid_loader(): - if (args.num_batches >= 0) and (cur_batch >= args.num_batches): - break - - def ordid2token(texts, texts_len): - """ ord() id to chr() chr """ - trans = [] - for text, n in zip(texts, texts_len): - n = n.numpy().item() - ids = text[:n] - trans.append(''.join([chr(i) for i in ids])) - return trans - - audio, audio_len, text, text_len = infer_data - target_transcripts = ordid2token(text, text_len) - num_ins += audio.shape[0] - - # model infer - eouts, eouts_len = model.encoder(audio, audio_len) - probs = model.decoder.softmax(eouts) - - # grid search - for index, (alpha, beta) in enumerate(params_grid): - print(f"tuneing: alpha={alpha} beta={beta}") - result_transcripts = model.decoder.decode_probs( - probs.numpy(), eouts_len, vocab_list, - config.decoding.decoding_method, - config.decoding.lang_model_path, alpha, beta, - config.decoding.beam_size, config.decoding.cutoff_prob, - config.decoding.cutoff_top_n, config.decoding.num_proc_bsearch) - - for target, result in zip(target_transcripts, result_transcripts): - errors, len_ref = errors_func(target, result) - err_sum[index] += errors - - # accumulate the length of references of every batchπ - # in the first iteration - if args.alpha_from == alpha and args.beta_from == beta: - len_refs += len_ref - - err_ave[index] = err_sum[index] / len_refs - if index % 2 == 0: - sys.stdout.write('.') - sys.stdout.flush() - print("tuneing: one grid done!") - - # output on-line tuning result at the end of current batch - err_ave_min = min(err_ave) - min_index = err_ave.index(err_ave_min) - print("\nBatch %d [%d/?], current opt (alpha, beta) = (%s, %s), " - " min [%s] = %f" % - (cur_batch, num_ins, "%.3f" % params_grid[min_index][0], - "%.3f" % params_grid[min_index][1], - config.decoding.error_rate_type, err_ave_min)) - cur_batch += 1 - - # output WER/CER at every (alpha, beta) - print("\nFinal %s:\n" % config.decoding.error_rate_type) - for index in range(len(params_grid)): - print("(alpha, beta) = (%s, %s), [%s] = %f" % - ("%.3f" % params_grid[index][0], "%.3f" % params_grid[index][1], - config.decoding.error_rate_type, err_ave[index])) - - err_ave_min = min(err_ave) - min_index = err_ave.index(err_ave_min) - print("\nFinish tuning on %d batches, final opt (alpha, beta) = (%s, %s)" % - (cur_batch, "%.3f" % params_grid[min_index][0], - "%.3f" % params_grid[min_index][1])) - - print("finish tuning") - - -def main(config, args): - tune(config, args) - - -if __name__ == "__main__": - parser = default_argument_parser() - add_arg = functools.partial(add_arguments, argparser=parser) - add_arg('num_batches', int, -1, "# of batches tuning on. " - "Default -1, on whole dev set.") - add_arg('num_alphas', int, 45, "# of alpha candidates for tuning.") - add_arg('num_betas', int, 8, "# of beta candidates for tuning.") - add_arg('alpha_from', float, 1.0, "Where alpha starts tuning from.") - add_arg('alpha_to', float, 3.2, "Where alpha ends tuning with.") - add_arg('beta_from', float, 0.1, "Where beta starts tuning from.") - add_arg('beta_to', float, 0.45, "Where beta ends tuning with.") - - add_arg('batch_size', int, 256, "# of samples per batch.") - add_arg('beam_size', int, 500, "Beam search width.") - add_arg('num_proc_bsearch', int, 8, "# of CPUs for beam search.") - add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") - add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") - - args = parser.parse_args() - print_arguments(args, globals()) - - # https://yaml.org/type/float.html - config = get_cfg_defaults() - if args.config: - config.merge_from_file(args.config) - if args.opts: - config.merge_from_list(args.opts) - - config.data.batch_size = args.batch_size - config.decoding.beam_size = args.beam_size - config.decoding.num_proc_bsearch = args.num_proc_bsearch - config.decoding.cutoff_prob = args.cutoff_prob - config.decoding.cutoff_top_n = args.cutoff_top_n - - config.freeze() - print(config) - - if args.dump_config: - with open(args.dump_config, 'w') as f: - print(config, file=f) - - main(config, args) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index fbc357ca0..df35c52c6 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -41,6 +41,7 @@ from deepspeech.utils import layer_tools from deepspeech.utils import mp_tools from deepspeech.utils.log import Autolog from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -147,10 +148,9 @@ class DeepSpeech2Trainer(Trainer): def setup_model(self): config = self.config.clone() - config.defrost() - config.model.feat_size = self.train_loader.collate_fn.feature_size - config.model.dict_size = self.train_loader.collate_fn.vocab_size - config.freeze() + with UpdateConfig(config): + config.model.feat_size = self.train_loader.collate_fn.feature_size + config.model.dict_size = self.train_loader.collate_fn.vocab_size if self.args.model_type == 'offline': model = DeepSpeech2Model.from_config(config.model) diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 0d17d9fd2..89d443e03 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -43,6 +43,7 @@ from deepspeech.utils import mp_tools from deepspeech.utils import text_grid from deepspeech.utils import utility from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -315,10 +316,11 @@ class U2Trainer(Trainer): def setup_model(self): config = self.config model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.collate_fn.feature_size - model_conf.output_dim = self.train_loader.collate_fn.vocab_size - model_conf.freeze() + + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.collate_fn.feature_size + model_conf.output_dim = self.train_loader.collate_fn.vocab_size + model = U2Model.from_config(model_conf) if self.parallel: diff --git a/deepspeech/exps/u2/trainer.py b/deepspeech/exps/u2/trainer.py index fa3e6d9d7..8e8634ac3 100644 --- a/deepspeech/exps/u2/trainer.py +++ b/deepspeech/exps/u2/trainer.py @@ -32,6 +32,7 @@ from deepspeech.training.trainer import Trainer from deepspeech.training.updaters.trainer import Trainer as NewTrainer from deepspeech.utils import layer_tools from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -121,10 +122,10 @@ class U2Trainer(Trainer): def setup_model(self): config = self.config model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.collate_fn.feature_size - model_conf.output_dim = self.train_loader.collate_fn.vocab_size - model_conf.freeze() + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.collate_fn.feature_size + model_conf.output_dim = self.train_loader.collate_fn.vocab_size + model = U2Model.from_config(model_conf) if self.parallel: diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index 3d15e0259..edcc34012 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -41,6 +41,7 @@ from deepspeech.utils import mp_tools from deepspeech.utils import text_grid from deepspeech.utils import utility from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -319,10 +320,10 @@ class U2Trainer(Trainer): # model model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.feat_dim - model_conf.output_dim = self.train_loader.vocab_size - model_conf.freeze() + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.feat_dim + model_conf.output_dim = self.train_loader.vocab_size + model = U2Model.from_config(model_conf) if self.parallel: model = paddle.DataParallel(model) diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py index 91a81503f..0fa8ed735 100644 --- a/deepspeech/exps/u2_st/model.py +++ b/deepspeech/exps/u2_st/model.py @@ -47,6 +47,7 @@ from deepspeech.utils import mp_tools from deepspeech.utils import text_grid from deepspeech.utils import utility from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -345,10 +346,10 @@ class U2STTrainer(Trainer): def setup_model(self): config = self.config model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.collate_fn.feature_size - model_conf.output_dim = self.train_loader.collate_fn.vocab_size - model_conf.freeze() + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.collate_fn.feature_size + model_conf.output_dim = self.train_loader.collate_fn.vocab_size + model = U2STModel.from_config(model_conf) if self.parallel: diff --git a/deepspeech/models/u2/u2.py b/deepspeech/models/u2/u2.py index fd8f15471..39ed9d5d1 100644 --- a/deepspeech/models/u2/u2.py +++ b/deepspeech/models/u2/u2.py @@ -48,6 +48,7 @@ from deepspeech.utils.tensor_utils import add_sos_eos from deepspeech.utils.tensor_utils import pad_sequence from deepspeech.utils.tensor_utils import th_accuracy from deepspeech.utils.utility import log_add +from deepspeech.utils.utility import UpdateConfig __all__ = ["U2Model", "U2InferModel"] @@ -903,10 +904,10 @@ class U2Model(U2BaseModel): Returns: DeepSpeech2Model: The model built from pretrained result. """ - config.defrost() - config.input_dim = dataloader.collate_fn.feature_size - config.output_dim = dataloader.collate_fn.vocab_size - config.freeze() + with UpdateConfig(config): + config.input_dim = dataloader.collate_fn.feature_size + config.output_dim = dataloader.collate_fn.vocab_size + model = cls.from_config(config) if checkpoint_path: diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py index 6737a549d..87ca68b29 100644 --- a/deepspeech/models/u2_st.py +++ b/deepspeech/models/u2_st.py @@ -42,6 +42,7 @@ from deepspeech.utils import layer_tools from deepspeech.utils.log import Log from deepspeech.utils.tensor_utils import add_sos_eos from deepspeech.utils.tensor_utils import th_accuracy +from deepspeech.utils.utility import UpdateConfig __all__ = ["U2STModel", "U2STInferModel"] @@ -686,10 +687,10 @@ class U2STModel(U2STBaseModel): Returns: DeepSpeech2Model: The model built from pretrained result. """ - config.defrost() - config.input_dim = dataloader.collate_fn.feature_size - config.output_dim = dataloader.collate_fn.vocab_size - config.freeze() + with UpdateConfig(config): + config.input_dim = dataloader.collate_fn.feature_size + config.output_dim = dataloader.collate_fn.vocab_size + model = cls.from_config(config) if checkpoint_path: From f15e1ff7325b6c0e94145bf4530b0601a2575413 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 16 Sep 2021 12:50:04 +0000 Subject: [PATCH 03/14] fix doc link --- README.md | 20 +++++++++--------- README_cn.md | 49 --------------------------------------------- docs/src/install.md | 2 +- 3 files changed, 10 insertions(+), 61 deletions(-) delete mode 100644 README_cn.md diff --git a/README.md b/README.md index 931e6331c..71bc63638 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -[中文版](README_cn.md) - # PaddlePaddle Speech to Any toolkit ![License](https://img.shields.io/badge/license-Apache%202-red.svg) @@ -11,7 +9,7 @@ ## Features - See [feature list](doc/src/feature_list.md) for more information. + See [feature list](docs/src/feature_list.md) for more information. ## Setup @@ -20,20 +18,20 @@ All tested under: * python>=3.7 * paddlepaddle>=2.2.0rc -Please see [install](doc/src/install.md). +Please see [install](docs/src/install.md). ## Getting Started -Please see [Getting Started](doc/src/getting_started.md) and [tiny egs](examples/tiny/s0/README.md). +Please see [Getting Started](docs/src/getting_started.md) and [tiny egs](examples/tiny/s0/README.md). ## More Information -* [Data Prepration](doc/src/data_preparation.md) -* [Data Augmentation](doc/src/augmentation.md) -* [Ngram LM](doc/src/ngram_lm.md) -* [Benchmark](doc/src/benchmark.md) -* [Relased Model](doc/src/released_model.md) +* [Data Prepration](docs/src/data_preparation.md) +* [Data Augmentation](docs/src/augmentation.md) +* [Ngram LM](docs/src/ngram_lm.md) +* [Benchmark](docs/src/benchmark.md) +* [Relased Model](docs/src/released_model.md) ## Questions and Help @@ -47,4 +45,4 @@ DeepSpeech is provided under the [Apache-2.0 License](./LICENSE). ## Acknowledgement -We depends on many open source repos. See [References](doc/src/reference.md) for more information. +We depends on many open source repos. See [References](docs/src/reference.md) for more information. diff --git a/README_cn.md b/README_cn.md deleted file mode 100644 index cc993f8bf..000000000 --- a/README_cn.md +++ /dev/null @@ -1,49 +0,0 @@ -[English](README.md) - -# PaddlePaddle Speech to Any toolkit - -![License](https://img.shields.io/badge/license-Apache%202-red.svg) -![python version](https://img.shields.io/badge/python-3.7+-orange.svg) -![support os](https://img.shields.io/badge/os-linux-yellow.svg) - -*DeepSpeech*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别引擎的开源项目, -我们的愿景是为语音识别在工业应用和学术研究上,提供易于使用、高效、小型化和可扩展的工具,包括训练,推理,以及 部署。 - -## 特性 - - 参看 [特性列表](doc/src/feature_list.md)。 - - -## 安装 - -在以下环境测试验证过: - -* Ubuntu 16.04 -* python>=3.7 -* paddlepaddle>=2.2.0rc - -参看 [安装](doc/src/install.md)。 - -## 开始 - -请查看 [开始](doc/src/getting_started.md) 和 [tiny egs](examples/tiny/s0/README.md)。 - -## 更多信息 - -* [数据处理](doc/src/data_preparation.md) -* [数据增强](doc/src/augmentation.md) -* [语言模型](doc/src/ngram_lm.md) -* [Benchmark](doc/src/benchmark.md) -* [Relased Model](doc/src/released_model.md) - -## 问题和帮助 - -欢迎您在[Github讨论](https://github.com/PaddlePaddle/DeepSpeech/discussions)提交问题,[Github问题](https://github.com/PaddlePaddle/models/issues)中反馈bug。也欢迎您为这个项目做出贡献。 - -## License - -DeepSpeech 遵循[Apache-2.0开源协议](./LICENSE)。 - -## 感谢 - -开发中参考一些优秀的仓库,详情参见 [References](doc/src/reference.md)。 diff --git a/docs/src/install.md b/docs/src/install.md index 79460737b..8cecba125 100644 --- a/docs/src/install.md +++ b/docs/src/install.md @@ -4,7 +4,7 @@ To avoid the trouble of environment setup, [running in Docker container](#runnin ## Prerequisites - Python >= 3.7 -- PaddlePaddle 2.0.0 or later (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/index_en.html)) +- PaddlePaddle latest version (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/index_en.html)) ## Setup (Important) From 576e94da044c5ee08c2ef417d1646c150f86d329 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 02:49:36 +0000 Subject: [PATCH 04/14] log interval 1 when benchmark --- deepspeech/exps/deepspeech2/model.py | 2 +- deepspeech/training/trainer.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index df35c52c6..128c4c822 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -100,7 +100,7 @@ class DeepSpeech2Trainer(Trainer): iteration_time = time.time() - start - msg += "train time: {:>.3f}s, ".format(iteration_time) + msg += "batch cost: {:>.3f}s, ".format(iteration_time) msg += "batch size: {}, ".format(self.config.collator.batch_size) msg += "accum: {}, ".format(train_conf.accum_grad) msg += ', '.join('{}: {:>.6f}'.format(k, v) diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 9549a4dd0..f5e5f12a9 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -106,6 +106,7 @@ class Trainer(): if self.args.benchmark_batch_size: with UpdateConfig(self.config): self.config.collator.batch_size = self.args.benchmark_batch_size + self.config.training.log_interval = 1 logger.info( f"Benchmark reset batch-size: {self.args.benchmark_batch_size}") From 6de20de3f899874b11b4af193cefed2809b9fd9e Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 02:50:55 +0000 Subject: [PATCH 05/14] rename reporter.scope to ObsScope --- deepspeech/training/extensions/evaluator.py | 4 ++-- deepspeech/training/reporter.py | 2 +- deepspeech/training/updaters/trainer.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deepspeech/training/extensions/evaluator.py b/deepspeech/training/extensions/evaluator.py index d5b359829..5137dbdde 100644 --- a/deepspeech/training/extensions/evaluator.py +++ b/deepspeech/training/extensions/evaluator.py @@ -21,7 +21,7 @@ from paddle.nn import Layer from . import extension from ..reporter import DictSummary from ..reporter import report -from ..reporter import scope +from ..reporter import ObsScope from ..timer import Timer from deepspeech.utils.log import Log logger = Log(__name__).getlog() @@ -78,7 +78,7 @@ class StandardEvaluator(extension.Extension): summary = DictSummary() for batch in self.dataloader: observation = {} - with scope(observation): + with ObsScope(observation): # main evaluation computation here. with paddle.no_grad(): self.evaluate_sync(self.evaluate_core(batch)) diff --git a/deepspeech/training/reporter.py b/deepspeech/training/reporter.py index 66a81adef..7afc33f38 100644 --- a/deepspeech/training/reporter.py +++ b/deepspeech/training/reporter.py @@ -19,7 +19,7 @@ OBSERVATIONS = None @contextlib.contextmanager -def scope(observations): +def ObsScope(observations): # make `observation` the target to report to. # it is basically a dictionary that stores temporary observations global OBSERVATIONS diff --git a/deepspeech/training/updaters/trainer.py b/deepspeech/training/updaters/trainer.py index a52fb9eb3..077694659 100644 --- a/deepspeech/training/updaters/trainer.py +++ b/deepspeech/training/updaters/trainer.py @@ -24,7 +24,7 @@ import tqdm from deepspeech.training.extensions.extension import Extension from deepspeech.training.extensions.extension import PRIORITY_READER -from deepspeech.training.reporter import scope +from deepspeech.training.reporter import ObsScope from deepspeech.training.triggers import get_trigger from deepspeech.training.triggers.limit_trigger import LimitTrigger from deepspeech.training.updaters.updater import UpdaterBase @@ -144,7 +144,7 @@ class Trainer(): # you can use `report` freely in Updater.update() # updating parameters and state - with scope(self.observation): + with ObsScope(self.observation): update() p.update() From 0e91d26ae3808497072c36d71d424a4db40cf4f8 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:26:08 +0000 Subject: [PATCH 06/14] fix log; add report to trainer --- deepspeech/exps/deepspeech2/model.py | 22 ++++++++------- deepspeech/exps/u2/model.py | 42 ++++++++++++++++++---------- deepspeech/training/trainer.py | 35 +++++++++++++++++------ examples/aishell/s1/local/train.sh | 6 ++-- examples/tiny/s1/local/train.sh | 4 +-- 5 files changed, 70 insertions(+), 39 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 128c4c822..8272d72ee 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -36,6 +36,7 @@ from deepspeech.models.ds2_online import DeepSpeech2InferModelOnline from deepspeech.models.ds2_online import DeepSpeech2ModelOnline from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog from deepspeech.training.trainer import Trainer +from deepspeech.training.reporter import report from deepspeech.utils import error_rate from deepspeech.utils import layer_tools from deepspeech.utils import mp_tools @@ -67,7 +68,9 @@ class DeepSpeech2Trainer(Trainer): super().__init__(config, args) def train_batch(self, batch_index, batch_data, msg): - train_conf = self.config.training + batch_size = self.config.collator.batch_size + accum_grad = self.config.training.accum_grad + start = time.time() # forward @@ -78,7 +81,7 @@ class DeepSpeech2Trainer(Trainer): } # loss backward - if (batch_index + 1) % train_conf.accum_grad != 0: + if (batch_index + 1) % accum_grad != 0: # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. @@ -93,20 +96,19 @@ class DeepSpeech2Trainer(Trainer): layer_tools.print_grads(self.model, print_func=None) # optimizer step - if (batch_index + 1) % train_conf.accum_grad == 0: + if (batch_index + 1) % accum_grad == 0: self.optimizer.step() self.optimizer.clear_grad() self.iteration += 1 iteration_time = time.time() - start - msg += "batch cost: {:>.3f}s, ".format(iteration_time) - msg += "batch size: {}, ".format(self.config.collator.batch_size) - msg += "accum: {}, ".format(train_conf.accum_grad) - msg += ', '.join('{}: {:>.6f}'.format(k, v) - for k, v in losses_np.items()) - logger.info(msg) - + for k, v in losses_np.items(): + report(k, v) + report("batch_size", batch_size) + report("accum", accum_grad) + report("step_cost", iteration_time) + if dist.get_rank() == 0 and self.visualizer: for k, v in losses_np.items(): # `step -1` since we update `step` after optimizer.step(). diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 89d443e03..68b001ca6 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -17,6 +17,7 @@ import os import sys import time from collections import defaultdict +from collections import OrderedDict from contextlib import nullcontext from pathlib import Path from typing import Optional @@ -36,6 +37,8 @@ from deepspeech.training.optimizer import OptimizerFactory from deepspeech.training.scheduler import LRSchedulerFactory from deepspeech.training.timer import Timer from deepspeech.training.trainer import Trainer +from deepspeech.training.reporter import report +from deepspeech.training.reporter import ObsScope from deepspeech.utils import ctc_utils from deepspeech.utils import error_rate from deepspeech.utils import layer_tools @@ -121,12 +124,11 @@ class U2Trainer(Trainer): iteration_time = time.time() - start if (batch_index + 1) % train_conf.log_interval == 0: - msg += "train time: {:>.3f}s, ".format(iteration_time) - msg += "batch size: {}, ".format(self.config.collator.batch_size) - msg += "accum: {}, ".format(train_conf.accum_grad) - msg += ', '.join('{}: {:>.6f}'.format(k, v) - for k, v in losses_np.items()) - logger.info(msg) + for k, v in losses_np.items(): + report(k, v) + report("batch_size", self.config.collator.batch_size) + report("accum", train_conf.accum_grad) + report("step_cost", iteration_time) if dist.get_rank() == 0 and self.visualizer: losses_np_v = losses_np.copy() @@ -199,15 +201,25 @@ class U2Trainer(Trainer): data_start_time = time.time() for batch_index, batch in enumerate(self.train_loader): dataload_time = time.time() - data_start_time - msg = "Train: Rank: {}, ".format(dist.get_rank()) - msg += "epoch: {}, ".format(self.epoch) - msg += "step: {}, ".format(self.iteration) - msg += "batch : {}/{}, ".format(batch_index + 1, - len(self.train_loader)) - msg += "lr: {:>.8f}, ".format(self.lr_scheduler()) - msg += "data time: {:>.3f}s, ".format(dataload_time) - self.train_batch(batch_index, batch, msg) - self.after_train_batch() + msg = "Train:" + observation = OrderedDict() + with ObsScope(observation): + report("Rank", dist.get_rank()) + report("epoch", self.epoch) + report('step', self.iteration) + report('step/total', (batch_index + 1) / len(self.train_loader)) + report("lr", self.lr_scheduler()) + self.train_batch(batch_index, batch, msg) + self.after_train_batch() + report('reader_cost', dataload_time) + observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['samples'] = observation['batch_size'] + observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + for k, v in observation.items(): + msg += f" {k}: " + msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += "," + logger.info(msg) data_start_time = time.time() except Exception as e: logger.error(e) diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index f5e5f12a9..18578b429 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -14,12 +14,15 @@ import sys import time from pathlib import Path +from collections import OrderedDict import paddle from paddle import distributed as dist from tensorboardX import SummaryWriter from deepspeech.training.timer import Timer +from deepspeech.training.reporter import report +from deepspeech.training.reporter import ObsScope from deepspeech.utils import mp_tools from deepspeech.utils import profiler from deepspeech.utils.checkpoint import Checkpoint @@ -27,6 +30,7 @@ from deepspeech.utils.log import Log from deepspeech.utils.utility import seed_all from deepspeech.utils.utility import UpdateConfig + __all__ = ["Trainer"] logger = Log(__name__).getlog() @@ -98,6 +102,9 @@ class Trainer(): self.checkpoint_dir = None self.iteration = 0 self.epoch = 0 + self.rank = dist.get_rank() + + logger.info(f"Rank: {self.rank}/{dist.get_world_size()}") if args.seed: seed_all(args.seed) @@ -223,15 +230,25 @@ class Trainer(): data_start_time = time.time() for batch_index, batch in enumerate(self.train_loader): dataload_time = time.time() - data_start_time - msg = "Train: Rank: {}, ".format(dist.get_rank()) - msg += "epoch: {}, ".format(self.epoch) - msg += "step: {}, ".format(self.iteration) - msg += "batch : {}/{}, ".format(batch_index + 1, - len(self.train_loader)) - msg += "lr: {:>.8f}, ".format(self.lr_scheduler()) - msg += "data time: {:>.3f}s, ".format(dataload_time) - self.train_batch(batch_index, batch, msg) - self.after_train_batch() + msg = "Train:" + observation = OrderedDict() + with ObsScope(observation): + report("Rank", dist.get_rank()) + report("epoch", self.epoch) + report('step', self.iteration) + report('step/total', (batch_index + 1) / len(self.train_loader)) + report("lr", self.lr_scheduler()) + self.train_batch(batch_index, batch, msg) + self.after_train_batch() + report('reader_cost', dataload_time) + observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['samples'] = observation['batch_size'] + observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + for k, v in observation.items(): + msg += f" {k}: " + msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += "," + logger.info(msg) data_start_time = time.time() except Exception as e: logger.error(e) diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index 5b9c45f50..1a341de76 100755 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -1,8 +1,8 @@ #!/bin/bash profiler_options= -benchmark_batch_size= -benchmark_max_step= +benchmark_batch_size=0 +benchmark_max_step=0 # seed may break model convergence seed=0 @@ -52,4 +52,4 @@ if [ $? -ne 0 ]; then exit 1 fi -exit 0 \ No newline at end of file +exit 0 diff --git a/examples/tiny/s1/local/train.sh b/examples/tiny/s1/local/train.sh index 56ceab41c..5097d4d03 100755 --- a/examples/tiny/s1/local/train.sh +++ b/examples/tiny/s1/local/train.sh @@ -1,8 +1,8 @@ #!/bin/bash profiler_options= -benchmark_batch_size= -benchmark_max_step= +benchmark_batch_size=0 +benchmark_max_step=0 # seed may break model convergence seed=0 From 054e099b282d697a4c16c5aaf98fafc47b9b938a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:26:35 +0000 Subject: [PATCH 07/14] format --- deepspeech/exps/deepspeech2/model.py | 4 ++-- deepspeech/exps/u2/model.py | 16 ++++++++++------ deepspeech/training/extensions/evaluator.py | 2 +- deepspeech/training/trainer.py | 19 +++++++++++-------- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 8272d72ee..7bf029300 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -35,8 +35,8 @@ from deepspeech.models.ds2 import DeepSpeech2Model from deepspeech.models.ds2_online import DeepSpeech2InferModelOnline from deepspeech.models.ds2_online import DeepSpeech2ModelOnline from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog -from deepspeech.training.trainer import Trainer from deepspeech.training.reporter import report +from deepspeech.training.trainer import Trainer from deepspeech.utils import error_rate from deepspeech.utils import layer_tools from deepspeech.utils import mp_tools @@ -108,7 +108,7 @@ class DeepSpeech2Trainer(Trainer): report("batch_size", batch_size) report("accum", accum_grad) report("step_cost", iteration_time) - + if dist.get_rank() == 0 and self.visualizer: for k, v in losses_np.items(): # `step -1` since we update `step` after optimizer.step(). diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 68b001ca6..2e512ef1e 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -34,11 +34,11 @@ from deepspeech.io.sampler import SortagradBatchSampler from deepspeech.io.sampler import SortagradDistributedBatchSampler from deepspeech.models.u2 import U2Model from deepspeech.training.optimizer import OptimizerFactory +from deepspeech.training.reporter import ObsScope +from deepspeech.training.reporter import report from deepspeech.training.scheduler import LRSchedulerFactory from deepspeech.training.timer import Timer from deepspeech.training.trainer import Trainer -from deepspeech.training.reporter import report -from deepspeech.training.reporter import ObsScope from deepspeech.utils import ctc_utils from deepspeech.utils import error_rate from deepspeech.utils import layer_tools @@ -207,17 +207,21 @@ class U2Trainer(Trainer): report("Rank", dist.get_rank()) report("epoch", self.epoch) report('step', self.iteration) - report('step/total', (batch_index + 1) / len(self.train_loader)) + report('step/total', + (batch_index + 1) / len(self.train_loader)) report("lr", self.lr_scheduler()) self.train_batch(batch_index, batch, msg) self.after_train_batch() report('reader_cost', dataload_time) - observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['batch_cost'] = observation[ + 'reader_cost'] + observation['step_cost'] observation['samples'] = observation['batch_size'] - observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + observation['ips[sent./sec]'] = observation[ + 'batch_size'] / observation['batch_cost'] for k, v in observation.items(): msg += f" {k}: " - msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += f"{v:>.8f}" if isinstance(v, + float) else f"{v}" msg += "," logger.info(msg) data_start_time = time.time() diff --git a/deepspeech/training/extensions/evaluator.py b/deepspeech/training/extensions/evaluator.py index 5137dbdde..1026a4ec3 100644 --- a/deepspeech/training/extensions/evaluator.py +++ b/deepspeech/training/extensions/evaluator.py @@ -20,8 +20,8 @@ from paddle.nn import Layer from . import extension from ..reporter import DictSummary -from ..reporter import report from ..reporter import ObsScope +from ..reporter import report from ..timer import Timer from deepspeech.utils.log import Log logger = Log(__name__).getlog() diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 18578b429..a5efdd541 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -13,16 +13,16 @@ # limitations under the License. import sys import time -from pathlib import Path from collections import OrderedDict +from pathlib import Path import paddle from paddle import distributed as dist from tensorboardX import SummaryWriter -from deepspeech.training.timer import Timer -from deepspeech.training.reporter import report from deepspeech.training.reporter import ObsScope +from deepspeech.training.reporter import report +from deepspeech.training.timer import Timer from deepspeech.utils import mp_tools from deepspeech.utils import profiler from deepspeech.utils.checkpoint import Checkpoint @@ -30,7 +30,6 @@ from deepspeech.utils.log import Log from deepspeech.utils.utility import seed_all from deepspeech.utils.utility import UpdateConfig - __all__ = ["Trainer"] logger = Log(__name__).getlog() @@ -236,17 +235,21 @@ class Trainer(): report("Rank", dist.get_rank()) report("epoch", self.epoch) report('step', self.iteration) - report('step/total', (batch_index + 1) / len(self.train_loader)) + report('step/total', + (batch_index + 1) / len(self.train_loader)) report("lr", self.lr_scheduler()) self.train_batch(batch_index, batch, msg) self.after_train_batch() report('reader_cost', dataload_time) - observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['batch_cost'] = observation[ + 'reader_cost'] + observation['step_cost'] observation['samples'] = observation['batch_size'] - observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + observation['ips[sent./sec]'] = observation[ + 'batch_size'] / observation['batch_cost'] for k, v in observation.items(): msg += f" {k}: " - msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += f"{v:>.8f}" if isinstance(v, + float) else f"{v}" msg += "," logger.info(msg) data_start_time = time.time() From 9fb349f9355b63a41c04471f928e1bc27c46bb8e Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:31:21 +0000 Subject: [PATCH 08/14] fix benchmark cli --- deepspeech/training/cli.py | 68 ++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py index d8719b3ab..07c213dbc 100644 --- a/deepspeech/training/cli.py +++ b/deepspeech/training/cli.py @@ -43,25 +43,57 @@ def default_argument_parser(): """ parser = argparse.ArgumentParser() - # yapf: disable - train_group = parser.add_argument_group(title='Train Options', description=None) - train_group.add_argument("--seed", type=int, default=None, - help="seed to use for paddle, np and random. None or 0 for random, else set seed.") - train_group.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], + train_group = parser.add_argument_group( + title='Train Options', description=None) + train_group.add_argument( + "--seed", + type=int, + default=None, + help="seed to use for paddle, np and random. None or 0 for random, else set seed." + ) + train_group.add_argument( + "--device", + type=str, + default='gpu', + choices=["cpu", "gpu"], help="device cpu and gpu are supported.") - train_group.add_argument("--nprocs", type=int, default=1, help="number of parallel processes. 0 for cpu.") - train_group.add_argument("--config", metavar="CONFIG_FILE", help="config file.") - train_group.add_argument("--output", metavar="CKPT_DIR", help="path to save checkpoint.") - train_group.add_argument("--checkpoint_path", type=str, help="path to load checkpoint") - train_group.add_argument("--opts", type=str, default=[], nargs='+', - help="overwrite --config file, passing in LIST[KEY VALUE] pairs") - train_group.add_argument("--dump-config", metavar="FILE", help="dump config to `this` file.") + train_group.add_argument( + "--nprocs", + type=int, + default=1, + help="number of parallel processes. 0 for cpu.") + train_group.add_argument( + "--config", metavar="CONFIG_FILE", help="config file.") + train_group.add_argument( + "--output", metavar="CKPT_DIR", help="path to save checkpoint.") + train_group.add_argument( + "--checkpoint_path", type=str, help="path to load checkpoint") + train_group.add_argument( + "--opts", + type=str, + default=[], + nargs='+', + help="overwrite --config file, passing in LIST[KEY VALUE] pairs") + train_group.add_argument( + "--dump-config", metavar="FILE", help="dump config to `this` file.") - bech_group = parser.add_argument_group(title='Benchmark Options', description=None) - bech_group.add_argument('--profiler-options', type=str, default=None, - help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".') - bech_group.add_argument('--benchmark-batch-size', type=int, default=None, help='batch size for benchmark.') - bech_group.add_argument('--benchmark-max-step', type=int, default=None, help='max iteration for benchmark.') - # yapd: enable + profile_group = parser.add_argument_group( + title='Benchmark Options', description=None) + profile_group.add_argument( + '--profiler-options', + type=str, + default=None, + help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".' + ) + profile_group.add_argument( + '--benchmark-batch-size', + type=int, + default=None, + help='batch size for benchmark.') + profile_group.add_argument( + '--benchmark-max-step', + type=int, + default=None, + help='max iteration for benchmark.') return parser From b381f5b447f53fb6915f1d30328bde8d29c5f27a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:35:56 +0000 Subject: [PATCH 09/14] fix profiler optitons config --- examples/aishell/s1/local/train.sh | 2 +- examples/tiny/s0/local/train.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index 1a341de76..5097d4d03 100755 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -38,7 +38,7 @@ python3 -u ${BIN_DIR}/train.py \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---profiler-options "${profiler-options}" \ +--profiler-options "${profiler_options}" \ --benchmark-batch-size ${benchmark_batch_size} \ --benchmark-max-step ${benchmark_max_step} diff --git a/examples/tiny/s0/local/train.sh b/examples/tiny/s0/local/train.sh index f96508b4f..9a76c7ade 100755 --- a/examples/tiny/s0/local/train.sh +++ b/examples/tiny/s0/local/train.sh @@ -38,7 +38,7 @@ python3 -u ${BIN_DIR}/train.py \ --config ${config_path} \ --output exp/${ckpt_name} \ --model_type ${model_type} \ ---profiler_options "${profiler_options}" \ +--profiler-options "${profiler_options}" \ --seed ${seed} if [ ${seed} != 0 ]; then From 9a95ceb0b4f605dc7c825c67c62ed1dcc3918f25 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 07:25:49 +0000 Subject: [PATCH 10/14] add Acknowledgements --- docs/src/reference.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/reference.md b/docs/src/reference.md index 341e13611..d3676fff2 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,5 +1,7 @@ # Reference +We refer these repos to build `model` and `engine`: + * [delta](https://github.com/Delta-ML/delta.git) * [espnet](https://github.com/espnet/espnet.git) * [kaldi](https://github.com/kaldi-asr/kaldi.git) From 16e60160f8b733000603eba0eb090887c1fc1782 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 02:36:00 -0500 Subject: [PATCH 11/14] Kaldi (#839) * can do frames, real stft * format * stft complex, powspec, magspec * add common utils * add window process func * using frames and matmul as stft * read with 2d; window process * test with dither, remove dc offset, preermphs * add doc string * more frontend utils * add logspec * fix typing * add delpoy mergify label --- deepspeech/io/dataset.py | 12 +- third_party/__init__.py | 0 third_party/paddle_audio/__init__.py | 0 third_party/paddle_audio/frontend.py | 146 ----- third_party/paddle_audio/frontend/common.py | 201 +++++++ third_party/paddle_audio/frontend/english.wav | Bin 0 -> 35824 bytes third_party/paddle_audio/frontend/kaldi.py | 266 +++++++++ .../paddle_audio/frontend/kaldi_test.py | 533 ++++++++++++++++++ 8 files changed, 1006 insertions(+), 152 deletions(-) create mode 100644 third_party/__init__.py create mode 100644 third_party/paddle_audio/__init__.py delete mode 100644 third_party/paddle_audio/frontend.py create mode 100644 third_party/paddle_audio/frontend/common.py create mode 100644 third_party/paddle_audio/frontend/english.wav create mode 100644 third_party/paddle_audio/frontend/kaldi.py create mode 100644 third_party/paddle_audio/frontend/kaldi_test.py diff --git a/deepspeech/io/dataset.py b/deepspeech/io/dataset.py index d1fe04707..e58e03b4e 100644 --- a/deepspeech/io/dataset.py +++ b/deepspeech/io/dataset.py @@ -76,19 +76,19 @@ class ManifestDataset(Dataset): Args: manifest_path (str): manifest josn file path - max_input_len ([type], optional): maximum output seq length, + max_input_len ([type], optional): maximum output seq length, in seconds for raw wav, in frame numbers for feature data. Defaults to float('inf'). - min_input_len (float, optional): minimum input seq length, + min_input_len (float, optional): minimum input seq length, in seconds for raw wav, in frame numbers for feature data. Defaults to 0.0. - max_output_len (float, optional): maximum input seq length, + max_output_len (float, optional): maximum input seq length, in modeling units. Defaults to 500.0. - min_output_len (float, optional): minimum input seq length, + min_output_len (float, optional): minimum input seq length, in modeling units. Defaults to 0.0. - max_output_input_ratio (float, optional): maximum output seq length/output seq length ratio. + max_output_input_ratio (float, optional): maximum output seq length/output seq length ratio. Defaults to 10.0. min_output_input_ratio (float, optional): minimum output seq length/output seq length ratio. Defaults to 0.05. - + """ super().__init__() diff --git a/third_party/__init__.py b/third_party/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/third_party/paddle_audio/__init__.py b/third_party/paddle_audio/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/third_party/paddle_audio/frontend.py b/third_party/paddle_audio/frontend.py deleted file mode 100644 index 1b337732e..000000000 --- a/third_party/paddle_audio/frontend.py +++ /dev/null @@ -1,146 +0,0 @@ -from typing import Tuple -import numpy as np -import paddle -from paddle import Tensor -from paddle import nn -from paddle.nn import functional as F - - -def frame(x: Tensor, - num_samples: Tensor, - win_length: int, - hop_length: int, - clip: bool = True) -> Tuple[Tensor, Tensor]: - """Extract frames from audio. - - Parameters - ---------- - x : Tensor - Shape (N, T), batched waveform. - num_samples : Tensor - Shape (N, ), number of samples of each waveform. - win_length : int - Window length. - hop_length : int - Number of samples shifted between ajancent frames. - clip : bool, optional - Whether to clip audio that does not fit into the last frame, by - default True - - Returns - ------- - frames : Tensor - Shape (N, T', win_length). - num_frames : Tensor - Shape (N, ) number of valid frames - """ - assert hop_length <= win_length - num_frames = (num_samples - win_length) // hop_length - padding = (0, 0) - if not clip: - num_frames += 1 - # NOTE: pad hop_length - 1 to the right to ensure that there is at most - # one frame dangling to the righe edge - padding = (0, hop_length - 1) - - weight = paddle.eye(win_length).unsqueeze(1) - - frames = F.conv1d(x.unsqueeze(1), - weight, - padding=padding, - stride=(hop_length, )) - return frames, num_frames - - -class STFT(nn.Layer): - """A module for computing stft transformation in a differentiable way. - - Parameters - ------------ - n_fft : int - Number of samples in a frame. - - hop_length : int - Number of samples shifted between adjacent frames. - - win_length : int - Length of the window. - - clip: bool - Whether to clip audio is necesaary. - """ - def __init__(self, - n_fft: int, - hop_length: int, - win_length: int, - window_type: str = None, - clip: bool = True): - super().__init__() - - self.hop_length = hop_length - self.n_bin = 1 + n_fft // 2 - self.n_fft = n_fft - self.clip = clip - - # calculate window - if window_type is None: - window = np.ones(win_length) - elif window_type == "hann": - window = np.hanning(win_length) - elif window_type == "hamming": - window = np.hamming(win_length) - else: - raise ValueError("Not supported yet!") - - if win_length < n_fft: - window = F.pad(window, (0, n_fft - win_length)) - elif win_length > n_fft: - window = window[:n_fft] - - # (n_bins, n_fft) complex - kernel_size = min(n_fft, win_length) - weight = np.fft.fft(np.eye(n_fft))[:self.n_bin, :kernel_size] - w_real = weight.real - w_imag = weight.imag - - # (2 * n_bins, kernel_size) - w = np.concatenate([w_real, w_imag], axis=0) - w = w * window - - # (2 * n_bins, 1, kernel_size) # (C_out, C_in, kernel_size) - w = np.expand_dims(w, 1) - weight = paddle.cast(paddle.to_tensor(w), paddle.get_default_dtype()) - self.register_buffer("weight", weight) - - def forward(self, x: Tensor, num_samples: Tensor) -> Tuple[Tensor, Tensor]: - """Compute the stft transform. - Parameters - ------------ - x : Tensor [shape=(B, T)] - The input waveform. - num_samples : Tensor - Number of samples of each waveform. - Returns - ------------ - D : Tensor - Shape(N, T', n_bins, 2) Spectrogram. - - num_frames: Tensor - Shape (N,) number of samples of each spectrogram - """ - num_frames = (num_samples - self.win_length) // self.hop_length - padding = (0, 0) - if not self.clip: - num_frames += 1 - padding = (0, self.hop_length - 1) - - batch_size, _, _ = paddle.shape(x) - x = x.unsqueeze(-1) - D = F.conv1d(self.weight, - x, - stride=(self.hop_length, ), - padding=padding, - data_format="NLC") - D = paddle.reshape(D, [batch_size, -1, self.n_bin, 2]) - return D, num_frames - diff --git a/third_party/paddle_audio/frontend/common.py b/third_party/paddle_audio/frontend/common.py new file mode 100644 index 000000000..7638dae53 --- /dev/null +++ b/third_party/paddle_audio/frontend/common.py @@ -0,0 +1,201 @@ +import paddle +import numpy as np +from typing import Tuple, Optional, Union + + +# https://github.com/kaldi-asr/kaldi/blob/cbed4ff688/src/feat/feature-window.cc#L109 +def povey_window(frame_len:int) -> np.ndarray: + win = np.empty(frame_len) + a = 2 * np.pi / (frame_len -1) + for i in range(frame_len): + win[i] = (0.5 - 0.5 * np.cos(a * i) )**0.85 + return win + +def hann_window(frame_len:int) -> np.ndarray: + win = np.empty(frame_len) + a = 2 * np.pi / (frame_len -1) + for i in range(frame_len): + win[i] = 0.5 - 0.5 * np.cos(a * i) + return win + +def sine_window(frame_len:int) -> np.ndarray: + win = np.empty(frame_len) + a = 2 * np.pi / (frame_len -1) + for i in range(frame_len): + win[i] = np.sin(0.5 * a * i) + return win + +def hamm_window(frame_len:int) -> np.ndarray: + win = np.empty(frame_len) + a = 2 * np.pi / (frame_len -1) + for i in range(frame_len): + win[i] = 0.54 - 0.46 * np.cos(a * i) + return win + +def get_window(wintype:Optional[str], winlen:int) -> np.ndarray: + """get window function + + Args: + wintype (Optional[str]): window type. + winlen (int): window length in samples. + + Raises: + ValueError: not support window. + + Returns: + np.ndarray: window coeffs. + """ + # calculate window + if not wintype or wintype == 'rectangular': + window = np.ones(winlen) + elif wintype == "hann": + window = hann_window(winlen) + elif wintype == "hamm": + window = hamm_window(winlen) + elif wintype == "povey": + window = povey_window(winlen) + else: + msg = f"{wintype} Not supported yet!" + raise ValueError(msg) + return window + + +def dft_matrix(n_fft:int, winlen:int=None, n_bin:int=None) -> Tuple[np.ndarray, np.ndarray, int]: + # https://en.wikipedia.org/wiki/Discrete_Fourier_transform + # (n_bins, n_fft) complex + if n_bin is None: + n_bin = 1 + n_fft // 2 + if winlen is None: + winlen = n_bin + # https://github.com/numpy/numpy/blob/v1.20.0/numpy/fft/_pocketfft.py#L49 + kernel_size = min(n_fft, winlen) + + n = np.arange(0, n_fft, 1.) + wsin = np.empty((n_bin, kernel_size)) #[Cout, kernel_size] + wcos = np.empty((n_bin, kernel_size)) #[Cout, kernel_size] + for k in range(n_bin): # Only half of the bins contain useful info + wsin[k,:] = -np.sin(2*np.pi*k*n/n_fft)[:kernel_size] + wcos[k,:] = np.cos(2*np.pi*k*n/n_fft)[:kernel_size] + w_real = wcos + w_imag = wsin + return w_real, w_imag, kernel_size + + +def dft_matrix_fast(n_fft:int, winlen:int=None, n_bin:int=None) -> Tuple[np.ndarray, np.ndarray, int]: + # (n_bins, n_fft) complex + if n_bin is None: + n_bin = 1 + n_fft // 2 + if winlen is None: + winlen = n_bin + # https://github.com/numpy/numpy/blob/v1.20.0/numpy/fft/_pocketfft.py#L49 + kernel_size = min(n_fft, winlen) + + # https://en.wikipedia.org/wiki/DFT_matrix + # https://ccrma.stanford.edu/~jos/st/Matrix_Formulation_DFT.html + weight = np.fft.fft(np.eye(n_fft))[:self.n_bin, :kernel_size] + w_real = weight.real + w_imag = weight.imag + return w_real, w_imag, kernel_size + + +def bin2hz(bin:Union[List[int], np.ndarray], N:int, sr:int)->List[float]: + """FFT bins to Hz. + + http://practicalcryptography.com/miscellaneous/machine-learning/intuitive-guide-discrete-fourier-transform/ + + Args: + bins (List[int] or np.ndarray): bin index. + N (int): the number of samples, or FFT points. + sr (int): sampling rate. + + Returns: + List[float]: Hz's. + """ + hz = bin * float(sr) / N + + +def hz2mel(hz): + """Convert a value in Hertz to Mels + + :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Mels. If an array was passed in, an identical sized array is returned. + """ + return 1127 * np.log(1+hz/700.0) + + +def mel2hz(mel): + """Convert a value in Mels to Hertz + + :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Hertz. If an array was passed in, an identical sized array is returned. + """ + return 700 * (np.exp(mel/1127.0)-1) + + + +def rms_to_db(rms: float): + """Root Mean Square to dB. + + Args: + rms ([float]): root mean square + + Returns: + float: dB + """ + return 20.0 * math.log10(max(1e-16, rms)) + + +def rms_to_dbfs(rms: float): + """Root Mean Square to dBFS. + https://fireattack.wordpress.com/2017/02/06/replaygain-loudness-normalization-and-applications/ + Audio is mix of sine wave, so 1 amp sine wave's Full scale is 0.7071, equal to -3.0103dB. + + dB = dBFS + 3.0103 + dBFS = db - 3.0103 + e.g. 0 dB = -3.0103 dBFS + + Args: + rms ([float]): root mean square + + Returns: + float: dBFS + """ + return rms_to_db(rms) - 3.0103 + + +def max_dbfs(sample_data: np.ndarray): + """Peak dBFS based on the maximum energy sample. + + Args: + sample_data ([np.ndarray]): float array, [-1, 1]. + + Returns: + float: dBFS + """ + # Peak dBFS based on the maximum energy sample. Will prevent overdrive if used for normalization. + return rms_to_dbfs(max(abs(np.min(sample_data)), abs(np.max(sample_data)))) + + +def mean_dbfs(sample_data): + """Peak dBFS based on the RMS energy. + + Args: + sample_data ([np.ndarray]): float array, [-1, 1]. + + Returns: + float: dBFS + """ + return rms_to_dbfs( + math.sqrt(np.mean(np.square(sample_data, dtype=np.float64)))) + + +def gain_db_to_ratio(gain_db: float): + """dB to ratio + + Args: + gain_db (float): gain in dB + + Returns: + float: scale in amp + """ + return math.pow(10.0, gain_db / 20.0) \ No newline at end of file diff --git a/third_party/paddle_audio/frontend/english.wav b/third_party/paddle_audio/frontend/english.wav new file mode 100644 index 0000000000000000000000000000000000000000..bb28291f69123209e6b7cc46b584d0a1f2c7bb16 GIT binary patch literal 35824 zcmW(-19%-<7d__;F5;$cYTIsV+qP}n#;aa!o2kuL+cwiSxVd*`4*u2u%hxtZlY3|O z*;sq+wWo2d>eau_A*4m6=GD6Q8=5Jc5JK@Wu{=H*M+hU~q-&?4oi5>L{(fI6ljx)@ zanxK@n;cUq$Q@E!`BhD_L@mL0SE}OVt4d8ukk{(B(nv6gz(>4FQenhH!tlHCN+(|8 z!XK`La*+^X;Dg~iLHLPC(w9^u|ESsOf!akDkYDNoergywNH&vURQ@hC#(vHNC8hF*kq%hvICE2Ams!?Q= zDyvG8HTcO2B!fy&8ObZzQ%Us=SN%xcQ4>f7QjR=VSJY8*OO;S7NqsUFcie>JBfVHj zZ7aE?dMQFW&~TDjja8rV8ih!IvRjQIQ%P~so=hYC$T@Y1d?sI1N7$q+`Hx&z$JBdu zO)XKu1JzHKP!r_AjI;tT_Nz#xH>YBWxmXeyPJFc#)>OsTEH93|Rr*&ycBfHj; zOrtJxLvs^aM0wSltitszz-gS9E!bx*MjaAA!?9|nMk8f%X*}n z+9}GbIb;pFsUFKysy`h>o0I8k5~)Snk{39Ik?IXRr67Jcqk4&ZyrqiL!D@rdODd?R zsw(-0Ynn}Z(m!e|{IWE;Kr54+RKjXUXa_o*bz&uyBXX!^sMy&H z*|@(p#6x>iliH*PeM_HeK{P+kcmq4h4$)8=LfQ}mSN}lPRpm(*_}M*~9p`;it%fJp zAaN?3%zzgy!#Q^)!|8t{kyNMm$!W5JoTpbwb$LP_kr8T*tfj`tukxMhjBD?ys*}8G zy-Y)P;Ws1Hag|j?lj$UjDy*KtkE)W~q^G(=O4FXC529#4sZA%7mujE-iOBh*zN?w4 z0KB*uu~ZSdQ)O0R_>FtjLJnw6&XT(%GfhV=Qis%`iSX@_qyRkV9`5Rjs)W4ol{6>Wkpaq+ zz3|Risv5o*N4CTA8X160a1F6@h2$Vli4WhsjjtWmj^sjCsEd=@qi*3Gmf*@K!{4*O z+g7NBD!-blE+K-_AoE;Q0Yo^(DYsC$V3%?@$7#6NLZl|?NZe!_@=Qxo2Dzj>?xQ6# z(pf}%E!go5IgjjAg>--q6ed4a5h8Jy8P!s?84*|x|1MQ2;i32CeYrrbLMD0)TlXMC z$t(3qWkw#FLu!+5Bs20=ZZZ=mm>1FVPQ4{3Nq5Bf3)xByB>&*Vi>c=7iJFOg`9lqZ z9}FQ2NhUHHmOibvA;UvgJ#19OG!a`l1x-(kj-8qVkhADX23?{)IH*& zb>L4oWH~aI-X=lHtNN0$G=xr4C**CFmM*4l>Vt1ABs=JCnuhKrzez1RpKhcKr`3qG zhKKzj-|*QywM8D5YmqrMvOxWd_cGw+$;o>}{a{22p~J~DrN1hqTB<6zhbHQ(3WZ<(hSwxRmD;OPk!++D8H9XTT$Lgh$yMC*LAe25a#d|n zHPm^zPUcb{RAn+r6_k6#MVWzgMkHmFRbhw0G#9JP#<31;Dx1yrvszjweVg7uOGEeL zYQt$SdYGKSwa&$TtVLG2i7JAM09*OVYP=$Yx85a1=yLdJHh95K)T5SUGCb~pDvCQ! zf&6e4`D(55!D`b~Fg|+Vw4b34)KZbC3HenkbxPI2dDkMbY7_2tF+9vb+=Rp9%c*xV zQAVlZi1S^zqMV4rp=ukV_l(Mb6Bw>;$z;-y$<|?$QKzAkN4C$Acl?PCSFw|n^4bks(y&*vWUSJ>Vdo| zlc{F-sICUUGBfb|v9L%bc#?&PTddl^GkPG7ub`SQ#rrImd*yGL2e0DBPrjDD(9i=a-%iZJn*4-rFUN_b5X{>eV&ER;j&2Ub^K@5=5?6|~b+)!o}Q z$}DAuo8z>7G>_WE2icbb7XyW@L)LG5k28j!7G8K>B|41V(x@@pXlm5gdueyrK=?v- z)E*9hY$qy+I6hH?%YWo^`CP50f^A^)5nKD%ac!}7pLu9g)QwfDq56f`T_*?2`S{sL zoa`A~Z4{}8>|UFsBe!wis}XTWP$5R+gk37VidKVFN5pw2#GZ{RxEyhn7Bw>^^2K!GCAML*NP}PUYv9?InA9j{DbI$-LX_fl zIOjXWcTq@|L1k~H7O98owTgueYmgh{HOWby%FQCRILP1d3Zjj8#Q!)Iof*zZ?iU;6 z0MX64Vh`qNRXrNd?rJsNM?#h+yB*Obq_g{=@ki^gRnzv9hhnE~`7b7(j_2_$d^E7x z&M!8qU9^^VU(amxGp3l8-PJrt+*izD`dhk5T^C#V8K<;U)VX6|IV-kx>QH zhm7jO^qKkqy}n*v|EW#Xk`O-y=^M3Lt`^xugg7g!)2}R=r2^LMEsLP4D{=yn^G799 zZRKOJSIn17fMN^K^E8HjMh9>MQJ|6a$R|EESEj{zmf}l!3sF_Bk?B=UctvG(S+zj+ znTIZ9J3OVlEGD}0=T1{6r8C+tY@f1oJFT66L1E_%?U6 z9?f#I7j&NNX%F@{_HB*t_^0yk)N%gA&sG7IT)SvIFzdSNy3)8xxC?m81by(Xa%VQC z)3maYQ^We{AL4)MzZ@uLb+8;u*%x>YwS$({7n*I{_uUsQQ^jlKF8wu8)<5h5)= z1*<)=z9ZVM@_{NVYpbU-D(ggFrDrt18He?(EWPrJZsHSH&RDygUC#N+SIDudDfvi_ zQ^~q%Q?zI;2a6>w@pCqB#=kproz2d7XAok4xa^JY<1Hyk$5Ic?L_8{5?i6ouUS;g` z*1CWdSYk!mv+NR1L9tGqV@c+*unwt@r>~i2MEE#Yc5MhPM;a@ab1tb-;*|J^xSzk$ z{Cf1Kg3s+pvdkFn>E&JKt?TXRF}yW{E(h)L_H=dE7N}Rwg1~kkNj#izJdyc-1v=Oh z9ZhJe9X0i>rsi4Zec(;>G;rrNm*_tBk(89N&KkR={msg3)d`dj^tT2%UF9}XpJmev zxfXbbggC*=JwuHmbiIlZMZ{F6p0y>g)9(tr3GA{?Ii+PKnp-br&T^%2?K0BqhiQ8< zL=BZS#Xcvgsd2d;Qq+w08j@k;AiDp!%MFL;Lv+&%`b!%MUm|yvK2QJ$W zogHF>N}!h6AiR9q2ALM6TO1MYT0zT@V{*Q@WvxwI6aO%-TwJC<|NhGIXJld}`#h;- z+TQ*l2SXZ%912SBW!@a#Ri3J@oZ41-(2n*uN~{*YI4)QGw#1@=Ce9(TTs5X`wNb`8 zS3i%>yDBI$XrHHwi)(@uk#SC6d!7|(tqw#7dIl<4%j``2n#fD0vIF`K^Q3FPySz8n z+aRc;dy~F^6co3e4VFLf%U|4|F|apK+WzEp6~Jojs=m|=fuH6vJLq-UWtB(9a^bXa z=GmXE`&MH+uhWiS5N%X|G-k=QfvAAGnl4`R*1Vxp(Kf9?0hbkG4|2NkouZ_wL0_1I z!n>p$oas}#LdgcZYLE_MxwFrH?f;W-GQMN{r1(Hw>p#uoUL;a`1es)J4;~gaIs9u_ z!;n_qy6#}tNb|Bjm7W!u?A-pAi525Z#X8Tk1yeCWA`Vs%+|rB!aF3(lx%R=%%IIK z)7Yg|Wf_QD+_CJWk%^b%_rxuZ%bUdF)kMFEQJ{H}OZ} z3f~%k%RoA-vt7^$6_e#^(nPCda`!NAHt%wGeq%JfDbCxqtoMOlR$hCX9dG;WfldiN zNJOfiWCeSs71G;kXX#v(PgHTwoUL)Db$ zgVM7$d?Bo{A*n>-vG@k@v*UlnABvAoDCJucXe+j}vhE5Yx5G~*b0R*69`|lCUuuk% zBT{5{S_H29J|@KDH_|2CNR08XaKI*L5w0bkltGPy?t1Tf{sCUeYR=YMgReNonf=5+ zC@IDl>-(HEEpXj#D!!;fELdM*gt+#(yLf*D9S;r;KIj=|v?e{A82>NdKHs*a6aI~X zZPsEtH_s=BlOL?S(ZsdZ{llHX9WbIa56L8`bKB}+Ewa+u%j^WZmh-?l%2UhNY7I@J zebe@7OW9-+EDt*2_DpN4b-{XMEwDaWbL|ZLmN-E=nSVkXrp}+CP5R;~3kAPp%XvNP zus>(gfW+wu6%x89v`NU8upyzFZ+KuV&q%YHB&c~*86IUhOv<8aG^pBpyJ<>ZqsAEtq?UeM9tfx$n#cU&vANY&9U;Lqdxl(@n7CaFiD zx%J(i!57Nf^sUy%tmp3HsqG=|SR+XLrD_Uc_pwd|)&@QYCR!!!2ljp^miLr}$!8j= z)zhA`DRj14Chj@|QOOoq^Q_%g1v{fNkFQomjg}!_Q;bZPCtbr7n}g45TSP6pGIIBS ze#5`dH!*Q&LQH(k1Uqq~Kh8cRzp_5A*q~iusUw2IMet{LSK~eFKzow{s+)LX=L~G~ z#Uw-|Tu4~$%Nz)Gvd9{=qn^=~!IQ=t>q&G!b^S5ZnGKBM`T|yutQDW^c>&_@o^&SZ ztADap!bu^}Ez$nkX``_Fz2~*}ruUX-iF>NqN-s&Di(R(k|Cuy6DW5-eppNy)ZpCZJ zbR>+W(a#yeEasYL<~15={pnJ5SCru|?Kf6q#K11AD{|yv9w8e5bG$-7zloeg|2cw@$mdoXSQot zSlX18(^OA&H9W*UO1-oC`|tV}TZJ6kId2yVFyFlRa`B%N^7tz|S=0`e%xvk&8oVuJ zamc%%AMX1`ZfzP326y)X?86RG$zG6jJK=2n+Jp>A-K^al%pfhQ_0`)OSDJ5gdbg_ zHwUz(q(&PGIe29hFn6@Lr= z^FTpoFyH62u6JGfCS&KzFI#+jG_q>~fn}oCoj`DVPE!S7FO>`1HMu*Zk z^^K>uR{4r09E~rK_%*4t-CKY;q_OOP)=JN3C}Xt=)anXy9WbNJRK`H9BpogHwa98ow$?a&)PUjK#fL|*N zF8d!coKyyXFqZ5j=g2ZrhzwKvB+ zbY}BkqBLo!M|yjQ=Sndk6h@O*CvRlk`bi0z(+Nx|naOR5M;LY08g|sIqI{0K;9T-rVvblLzJhyLhQ9k4_|^XSmQuqWZ-oS#Rn<~^ ze;}3h&Hllcin6?dbKz)z6uE+%8Wk&lUF@*GjXE(OD1CbLpn;(teVQB8#1r6zn^Z_{Ucy@Y*gej>`^U zpf`hWt4@>HX}!AH%yrLo$(7p`W$x79vM(e6Mvd@BP95i(bBR9^A?iPs7wkC&gOd+k zd2Q;WJ-~yMWJBqAvKWlacyJJJWDB)jJqN2Z9?aiGxm+%mM`eP{2F8M^gR-)GBo=_J zdM=uygV(@k9mV&Ki8?|<_m)Cr25a-3f8|-lLoryrXIb3Ap*xe6NNz=356)udBZc`= zJIFc0`+?Q3A)@R)Ny`&<#NSQ$C+VA&Ph=!pSd8A#9PE-V$2Hb<+`Mn3HLmHObxUuh zuV$B25q>W)G3k)6Yf{cY7rPhuom#Rsu3!UZJHzP+t(LLH?CM(ND(V_wRyS^IX<1kD zL9Q1M`3!!L7ZESSBycTTWG!T~Qs7;el7?)R#`HN_W#rcR%7e+qI`IBWkQrWrpGzyZ zi5QVb-U17^Qtbz;T~mdlCJdEPh@zv2iYTyGZ@_vzkWJ(|K|~DR=MT>>%EKbp`FWmR zRFTJttv~cm3il-c6PYIbm#4f|NKSHUI2-vOISO-@3(jGGe8RoB$oT1rnFCLqbZS36 zp_MVJm_N*H=n9{kE6mPjO|zX@$6RO})(#Sfx3``p1$;Y`+6UU%jKAj&KftGRm*_25 zkSMJOs{R$%6IWZ;TyvyRK)=A6)8r&XRh7F%VR4@S3wwOw14RnVcc!W=V8#2B5oh!>@0mqilU%!9n_T5wk>)*Z8TrgN zSeyKv{3FpZ4R@q-#NqY;dnLMHL*66Hv@^y>GoR~`S<8$uG8qH)<=Q69$gX1&`c@th zllU&Dt@F%z!}Ea4uSoV{2K0e^CXuuwy+k8fKK6<>qKnB3FufGhlpC1CypWDef$3E$ zWQB+F9K7j?l+vZ{$UgF?7!5ABpLh%|Y@3`WzaevW;*q>OKf+6jx#FJ422Y)?_R!a%ni1T3FN-`A*L&nHm8qNSAN;QC1F_Hm$;3IvjP#kjvR>DTSGdZz0iF6 zHDkQl+*RHE)BVRC?OqHk^atkOZEvx{?TSt=PWUUkomD$f0ciJ@Gec&g`LvOGoF1%R zy@m@V}n^WrR`wwj71U$uSi%%SHZPDktx+2TseQYa<=A@Q^@J5A1uNmf~3 zr+9aK(ZrAbS6t^sS zB=7KcP8p{+? zKT-9cfwLVVeu`?C;{1j^LPSXcSrT3{6WO7oD1vHpnS7x0wHUKTkP~_$YS7$xTC&#Q^$%l0xI{e_qn&)#D}SEAL2DFv-6}Yl@gh!qm&M5@^n(5LHt>c?Mh@ek z9;Nrx4uBJnr?uHZR*&0*sqWuw1?DB~@~utU5*T8)1{?ar z=C&6dYeV?t9V)d!W{|t0J2R{hte?^f=nM7c`V`H>KB@kqtJBZ^$8P3C@}hjX^U=Oz zk98*Uf5inEPTFGL-dlU3z1D7Pf3#wHC%vEER1aw7wDar`JH|NeK&FDd?#mDHsiFd| zznz%MH6Fz|pM#9nm~N-TX-OJI>(ghnF6+-q16>rOwJ^`#fz!_*ZE;of7Ei!_6CTW) z^Gm!eaMyD1+g|a2U*o3e56_s4dBQAkzc1BbvWa~(GX|Fp%M{wt(^s#?3ThjS+^+g& zU9G2T&okQ-16%y@{%lqbyQ7`czHHsF>exM;*LLBAqqBw&pZE(%0X@}UYZI|=Sv3Yix19KZ6CNw%t$*OAg^)I?%9M`Mp zx3tb$er=&PORr~K)!VaZW%C&RPJBf!>@5C;ZJXK^oaTJ2m?itFY@`!e1kSb{O~a0` z5||r&X2I+MW`%jdivOLpjsy;`4+Oo*iFfkye8`Wfc_yBc>pYCN<4^b$QBr=BE!9MI zNTsApbltlw^mXVGRp&NIMQy2~h*Z zDrc}AY0n3HumG9(IG>Iz7=V&yHfct~*a22p%b;Cm$-xTf^aW~e4Jb_J%4Nb2L&yyhfxoLSnu zg?kAx-stD_XuY;((P89|+$t=7gYSi>?sPucUBDeavgbOjcs}t^n;3t`xO-Imu&_LXQ)}n}-CxsY*ocSLg!|UTUPJ!p_gI9{;6ZlL%j$h_O z#WMMbDDAc@t2fqN-<+zaHQ%`Ncwc%Bx>g!OJH!^!wW_8}iHUl8vEQj|UkI!Q<6gvC zWiNLQIRjx~iJCG98TbU<#`gFA>J02njrcBji)1faLUzLE9BKqMm6({Il`P{bkViFWMT>r=U1HPj&gLK4Ib}8#fpJiY#U~;U!X4-kJ(5D=$`gLxmAvQRNXK`Y=#Ny zZy>B*=+VFNJR&c+|M9#SGV5_3DN=~F;;5>qoicm4F6+zKTNYvVbZ>HZcAYlT8>{sg ztpeK&_GK}qv7OZ-@zNP%7qH*jJDrsLvg37Pz?!;|hyD`_fCdLb2a}eTN9TBfz1NoO zhxIndhdZ_LT3gI&m^Oz!gH|aGX@UNrw=5+8h;<^n*vOl}W456a9D&TAPL9Tudx82x z3TV^x)cPm3mOavv8I6sadIGD8DfBLS30jq4=mRdw_HvU*xq7eu7o zjTvZPsK@F-SJp=L#=k9~UYP^ELQeHa?m)jX3W%bu%n0S(Y)p`sh|A&#=8IINr}wp& z#w+07V762v=2o+^nZp>T*VNBwYguhNn+W2jqewKSDAV{mCkx*XJbas%;YZ4H_9*agS-lrz&}8d>*Z;D9i%d#hg~K+0DZ29Mb_f8Y>1wp zBAF~N8{v1y$(-ai>t|d7|96GdmqLrnjq1MSRIOAFJ0n{4n(YXc7JaPnjkH6N~0aagR_}X##3^;tB+z&f1Ku0!Q zPQrA3nrtAm!aJhCT(p!8(Wk7H{mDyK$XID~(o*5-efoB@5vH1HbdMgY7uRmn8RR&r zOT($H3d>KtCLe|PEC4oWD6DZ9QGbXR2ENRSeA);)p$TA3Mv$d+6uZOru!(E|Iw-G} zN6VrGX%|^zc99l_T4^lKHaXc31;vDSqQZ$jFP+}5#o1UdJp#L-2 zEtXG<1U@gvPS76U3gYzo~tC36!#=m-w-dOWp&tU zKD^{N?6C>GS#Nm2LVUJJHj~+98g$F`fYH8*g813>vb0Jf?X(NVdZQ(|2b2TKcd7fQ64^-7CM?w(BzgQ z7Kvn^SY~*`G&YKDVZT`@vc@@9g#8OG))J`4pmBv}HW&DW0`f9650``+I*g3)TR#x; zzli#dWU97VZ^vw?Ny;$8=xCJH9zvJ%hAf~-P&{ozmwrz5f#xSjJ`}^yfsF-riiKxv z1InDJazVKhz*lKukJa#%kJ5`wGa25H3_6Q$a*sTQiF;YOMNCATH-|-MLwmYZ#>qZp zJ_|DTn6->y+IE&xuZ*rUx4B-Q0Usa4rqX=S%p|B2s8YYg9)1yZIuWzks!%MUqjvs) zY0d}5#Sm!b!k|%nMUvwLqu58*LTir9F$Zd{Vyqr(#!7&b{06<=CN&86SW4N5_3ko< z{0fHYHBM(Hddqvz2ZcaA`I6)WcXgjVry1y0+Fq-tpVq?JE;58%B?<71X+Ty#;6?Xj zPnjEcy#?pG1$}xb^qT+T^KkgzSEWH?+YvtV41CU4baUn8ckvclj48P9dWh2B=+A;= zEPOL76fR?AUqsnz=swHRky?yV+x5o?Xzy5*-rOt&hU&C_hRvcEf%c9->7%LE@{(Yp zGT(tI?MvqhKac)quSnwe_#u%8s8W&CP-nVm0Xl^;c*c7~{5tdv&*(^+gLG zjh0a{P}n?{4EOXKy1vQ4Efdivlz}?yq&g2~ijdu86MmZZg?uZ@iiPMCf`MyCLC;b_mPO?XLoanpzEnZdH%O==3s>#)!LGBnchTQs~ffLE}3E$~_&-Y#Y?YwdyQ1cl}TcBf!m-hHkAG z>!MZ0l*fa7(T1G|W_4&QG6gEULx_$ussS`d$H2TrL1*H?I^9$b==wb1h5E}CvY~pV zZbJuINiG&Wz;HGN^LG?>_?tf)qWV0NpTT@YAtN6~AJ?lXh3T zV{~-2Hpl8e*i3Dgp3zvUm(#-OP*nhI*&k66ezslI6O~|@a;P!;#00q){aYLI1a)P; zV$d!&K@LoYp6dkEPv4+H-$}N>&)bpeq$Jd5?V!g@BAIA5>c{^yfwrtTG`9V~Rvks< zJ%+k?0O$Q#{zCpsM895#9A^J&gTN-IhL*`@!?gKq7;@TaXqvO2cKX0yHAQ}mM7L1@ zn%gh(w3>@Kz+kch`pA^3HZ)3qnb1D+0+@w`LX!oNZ?mGFOh?sSiL9{$aaRPq;X5!s zhw$Df|IZO+#P=FN4fupCWkqyp)Nu7SH|jrGCC$*c=zaAQE202@rh>s=R% zP-Xj~7kmxX&=EC=%pw`cY9O8g$T#OmZs=hLLdDq#s=CzB2YrCPIRV<|m6%}cBhT^w z?V$;8K!3nSZ&0UqBR`(U&q`pVeBk7ogQ?02D-S?+zRcdSyU+l(rybY=ZJsucT_;_D zJ5s>UVt}(=;LaAwPcjuWvz_4;jqo1hp~S4Mge-!ecny`?ZRy}@2LkIAM|}3g9h3x1 zxEq!I8R8@(w5BES)BmAv4@3m-K=j;2%{&B4FHwo)9~PuvGeo_es%1-20(@8}bk6}1FZP3JuK-_Z1RkskrW1cP+e=gh;KJ4Q#vA>rKkJx-WR}XwUq=h z84bn!DzM;raIzUtb%T%_i=i&Xz+28p4|Waqp>h>cu^_`MG z(XD=#TY(Jvp@s}lGiY_Ky8b{L&U(pgBpdb z8-i#l3{AF9=aRMRKlofz%n#n9IzN^}P+6t{6Q9I6DmfSaIR;fD2Jv@A76Ou*hYYq8 z9nJ}ngbIEaYPg(GWv9T5<1@*}UTSU4(XK|Wjm8jtCy;j~t(hiS932eB;4@(D>0%yu zjV`DbMddy)t)s*fu@n4SZ7BL@gYB&jwl*ECpAD*eFr3I3MWN=+iw-LoQ?mE;6J1a1 zpeug_tNC&Lcc88R3DxE~)WF_|&lu$WcgWlS$=1kMJLv;V6|b{z^grO&2lQWBknV!c z`w}v3Ms)_t@GsCY&xTsx#+JZ#Xg;q%iCYWr$*?Dq8Cvk!sIRqAjoYCLQ!tNj(CthF z!p*NL!n=xrO^iZTCx{>muXYU8x;}7ADd5`kvH&vwOLdqm$LVx3j+x0^?~S>7H_eai zQUo}>AZq~S{A*x|rudz~zyrPTK6y~#uM6`3N|qTIKA1d4?MMdJ_AT~A;=p@e#O}me zG7!58ZSk)cD&ZsaC;dhb(q`DMI6#)dS`$bQWZVa+rw(Fy3A|<}t~ySpL9|n-k9xAt zEFJ3!X0S4CNm~QEo=3+10Hl@yXD|#?q_W5{8uoPV0M}dsdR_vb`2r2-aYXcE;P@G^ z!zRp@&ZEoOEUzP~BG4tK!?sBsRUcaPDzNZ3eE$@F_9z(Pq!h91Lal zTX^huPfZ9`o*Qf=}Jp;~JhKltH8x);MZ$xJ{OyhPVBjiC<&k6;5b>!=__$m{6r(@{* zM*&m+!W3pKG{argdgSwKxQ-Fv+dHBvt;R%ZJS;L0{9D+JqL1TIymB^_{06;T`0;%w3ZRTaW}4ui`1Hu8^#%mb1g9c?P$ znR-BpGszBgRy&ZP7eUoO4qG~7;Z0M>e0*mYcDY(%_JK_t%q9n7vuY+7>_|+Ymf$Wa z5YJKgPcSO#QXq{ZuviCd3DrQpJ%!V$ij3YGHrR#jlX=)r=z}QS0^d4^EsjZ0ZJ)(w zTaj%#pq4hpYt(@?t02aTLC@V9pH~H%I*IwyX;P0KqC06HS^?Lx0J(AyEkiSb%Pj(S zYAG}dln#QI{jg#kLZDx;NHFf%PvG@G{Wgz#CsgazwPib9PfAnle+%!m7%Cs>kwz7a6VJu zM@MkK+u-#RkfRpix5uK&H-Ufrz3OT>#R~BIhOpyg+}nL*^aUg(T}F4%?yyV+)R8gx z(~PEqZ9K3|cf?dUoes;?1CL(|SS*~}Mc&?`{_ZRdzz&HI6H*EvDThk;_kCL9YMbJ- zR=^h>@l&VJIWm$KaaDxmCwUM%6=C6)Kw%59ZMg=vSptt3Lb?FKH$$AYgExh4EI9NA?DEb$n5@(B2%VW_SLaVKwJuMM!;EL__@SYSD9HC6qC ze7X|Wm4*Mg1rr=fQlTl<4@KA5$pl?h{dAd>yC#NU|?lT}ckT)6VTnDTGE&H;a(3+)8OZ{7y>FO2@%!B1p> z@9bAiNGN?o8?(D?wpLBwpu6;xT0HLP0xHN~KA|e?atfT)EHGTzWnEyf1HgtYz(3{! zZb*fEYO5feW-;Uqj(GVFzevE25k=J0MpoE`GfluAUMzW!^Nm4IxdOSP26&JY@c+E( z2VQ*>I_twgF%h`ZdB_jDaj!X1_Xkm(RYZ;SVsl^*Z2`|+j7;N4wb_hQt%D37hFz9) z@P_Hw`pJQs&=!$c89S3f<_lHae<<@QbpDseOpbcjqB zeTT35z>eS07f(he%8I%3YvA*P;DxSAzbuEm_z+uAEfC#4G60==K9(OdimP-stw9rz zo&WA;v_f9bh>ya+K2_mk-4L&j(1mBid6j{6Y9n7~f)5>r725pYKH;cgLFg(AAiGXN z<#>*H8^_ALVR5s%K*fu6)Kec*obz*dc5_tdCcAHV}8(GJ=keU-7@_@%$s zwxO48K#Hj1@)cBS23U*dvgqIaR`n8=8iw=Eh|K3h&A(16fQPC<-y=s{M0Wp)`;5gb zX9n2q%BV6I;5V`4B~ac@MA~I+yZzNcTtn~O1f0nnFqfCXBh~_^+#YB94s6UE}? zPlw!e7;)DCwKE9)>MwXs7^={i|96@cu~-y&uP9K!b;MUW{KN(L@d9X*j^NaSpgm}V z+%+AZ(h`53s{`aTEw0Vemm3$2YQ}KAqLv8U6{Mbv-@FB;Y$50{Zpk5FL_Q$5YyxgL zh|?H{T%!Zwh9ZYgL|%zO#{C!gdkuK7R$$RQs3^lodUOl-=tJCNYsz7rmWaz(*zgNp z>lfJhE*y z)RhXbbXIuNG-SYJsDY#K9>>tbj3d#A|DEug5On8HV9{WBUprXzHc&+bI2;{W;Wv8q zOSq%az<;6Y6_DCac=TTM-5-zz^8p>s#ubjoJKo0TcYUDsQ`%>JoiWBpX;jj4X^&_N zvQ-Wi(fp3{!g zF*}AjRs?LE9jrS+MhU_;eL?)2$IH@!&}Z;m-c<$uEYb z3LvwOQdtQ{_jw9Bi(oUOdCUklu4{GK60%OF7rPxF^z~hxhJ1!Nji()q2E#cKSh^7= zTjkLU4h53<%QJQZL-7&ObqSa@oQwoZ+X;+XJ}_$4&>zN-!89FP0JsG&3`-r#>VA$KRP&S)BK&ESqezloQ4i+~8 zzkg3QLAGuHe0>E764N$d?wZJo5_{yw;LSTxujirWSHza0gTDU_7``_^_F0hG76U0g z17dg%J>)dxABC!a1s`|8q$}`3*g-)b60hcg6$oX;w2pdbFKG77aOaq;5Z*+@6u+nO99Y27TGQ$(vp#CM2 zKeV9MK@T_f>tT9s?13!BCSn07TPEA-?b-HP=ObSwvyg6>@Q=~B_Fb!}<-uv_?K0pQPPrf5hA3@yzz@#fBc#)rK z1zoQ#GTg52u3D~RW?kc&c9GUrX~ZAGtPCwxWk93f(Vqsu=+CYo>4?4d( z@XZKd@B%EoHj!9=##HJn#~jv=Gu7XHVMvXiXi zuuEz2NOmT_XdA7H{;ysN8|Sf@&$myWkAqMIIsioIEE6+wvfOr#PUi>X_BcV~AQ_axVSqnv(}{*aaUTWek* zM_@!?k(JkZ&QHo-K$69^tJ+!2T7y|A4aT-=dEn*xK;B{Cu!_-vv_E}?8Vd#u8q`jr z7MMa?yq5|yi$Scub_Kegq1tWsk{-wGY^975xuJW`3~sI{u4IGigKWQt-oiX~AMFez znFU@M4rZByPyG-5SY71Qf$BZbYF6wj=SDQ%z@%S5HChNr;0BnV1(?W8#^kq{+Jy}D z9dql)A`EQkWtkQ|!YpKf^@ybrh^V|kM_&*>Pw~#Nz!*vJq8H#U&Y@;k2bQ~tUG;G+ zmA)CeHtJsLdSQ+>7HKKibG1dxat>RU1KR>wtRD7PXM{+hhLUUaI4h$Kz*cT!wi%ss zHz30lU^QA`k~|XVrY-vRgRoX%Iax%D>S7!2`Lleeu9NTd2b-ro*7j&wwH~ZLIJ7Bh z6PVWN;1hdcf?gh1RSwtv7;3kMq!FstVN9H|>mRj&+EvW1B%Xe71=`7%&U>c}6t2_3 zyG>Ag$qhP(T||Y8V2u!G`+;TJgV{QZbH5KgK!4zXkGSWnsHFoiRoH;&E{RHY8C*eG z^o@*kRVSdJTZ9eCC@{+n<#9Ea&eX!q*6u=H?(OVN;dyT+Y1L>pnaNocxRjJVXH1n(OGqcV^U}`eWaPtz zkef*fwnMwDmxn5Hi9TG*$!_5JCYxk}*bEih9Z?MNUH}zy+5fxc)X1fKup3P=#~a4( zv2kLHO~tT)m-fGTJ|13R}MnGTc=&p8yaPd9pJ;ev7Gd;*5f1k1_vkB4Yn8v z%=J`u#=d$JOmSMsI%GMUp*JmOH@9tOsi}8g&5}NtPEq z`EP6ow}eWps|saaW4wDzP|uLlA>)Fpc}u${YhP6v{wz=DeZNESJlcmIMeg-P+G9ooJa7nPoDJ@Jtq3K!) zR-LxSQ=Z-dH}Apu&O_B`huks_m~A8wNIlHtv&lio42{)S)g7qnHJ-DQ8qYBrFP~s@ zx;QorW~19&i!5;s2yZ`XLuY8Tm>AAOxzDKrwf#F0s#cTf?4n-Q8xYTD*zvz(D% zyH28Ejgqzq)Cm+0@W4R34sS2>;aM#m*cnW2&q24+ne_mgy~Q#?AJ|VT3ysq;JjZC7 ze1_ezRydkr$XbC3Y_=!+yWOpp^jTT@0jm3w1vNsI~$AgyZR1<$+zg|G)2j zkJt$YOVJD&>@BkB2Vk0Gu!AOwi37+r;qbt1z_EkT=M0sHpeC(`-QYre1<#AEgBZD2 z6{A_SHO3tG$)G^U&Cs?XKfUW+$@H~km~h)o{55?lakH@cbAbGD4`DgL%s{D6VgaFVIJB#%!|(xT&)0 z4y+@@E>yqt(Av8&SzQSq>;~)~h52a%I!!xmp7Ev%{Sa0xY;!R4NWCKcB!=4y{Zo7% z-+SK-|8lF7(+3+cU!hFBjb}~mlM~^;uk|3;J$EkeDQ{cvO7|6G0L!5I@YA+w$6J~0 zC3bme;AV0^4;9a#=Kaohi9~su*esQC*}UlTxoW$@&B=NUCRZiluZ1xgtc|;Qik{`N zOaUF+cm5Byt8$CHatgN5YOo^OQsku=c8`7n(wPAaR0}A4F}R@r!1@+YD=;|H4-tPs;e}e{L;=B6t{&b-FkOp}ZU}DA~hm8)sanJqg|xL5scn z-Rq1pEQh+tyF1~~A0Kw!II+$ZXB?g!5wLICx9wrhEAGb=N;2tP%onbP?l{*&v$1g! z^|?5-;$wk_IA#Fr;pn z^Bc_PA9#9Nq`Q5AVDbhjrv@0_*H1G|Jxpbh-R zZ>p2GvUQL?}g=bA@sdH*S@pOuNq@bEAemQ6C@78wXM8M+KL=&Zk21CXzPdnmj7np zh_%&D=6ti;+Ec7xYk(E%Xfg|p)JH%``P(zwo8P<9UEDmZ&7f}LLX=j<>#v6*bh=Z) zu5En@Bn0YPm2jHH<#kd_a~pq*!X`I{7>D#YptLm5s%+7umPW6lg|MQesI0~h+aIl? zR&#r%^B8;Ro7D+C5pFH;!YF*~0VCI+ErS-nH#>}J8lLzk^NH@zNXI%wuun4-dq?xo z*I$8lr!5p3q2L>$ai>*q-^ryH_He**ycIgC^ii1`8RQHpqFph225k<#88#<$L{LHZ zbiEKYu0VO3h9`r%cC!_2d96OyYI_8*M?qRiyRGjwJmv$VJ~mAA>RYt*KuSe` zni^>bproR7KVo2|lfhYGudqvFV?P~sZfBD6m_(gMRZW9gY!jN94M101i0uJ3uSx2v z@8SY4&C~EU$T)?dt1J(d!cr(CtH1&&F;9PqP0iY12Im4_6_?@AkrKp30`!I#kgtoA zC-k6R!1GVYvalv$TY_JCwwlGY8`vq^YURXE_k@7g%57^-E<71xj#b=xZuN5h72A}b zPSrgx!Fk9SrI%@XpBj4(*z%KGsUJ+W0 z1lbJJB?2YWMP#STsJj#RRbCDCvMhS`w3xMQ25Q)Zn)sEjG_x%B*OfGzGG(oYsHV zU1u)(<=UP}zN``5B1ZTRc*kp%l|y0;SFzWbz0HecAkE+pYs~6&<~XAYqgN5_yl!WJ z?ebDtsP*;C@D%WLVjND&O{Hp7^ykFA%!gyr4%7gh+!Q;;YH5toeR@F8gPP-=Gn1-j z0og%L^hRCqDR@sNe2!k+8#2T)!V*x5z4lf#Y|&_1@}k$?Z~bWx0>z1PL&X1d)H+kF zY*u9FwQrBy$U#w)qH;v_i;VYo@m!GKyKAk9di!9d)CQ@o zgKDU4C@FMA_wc?&tv=Q>tG442EU|<#NSp8d#~0~8?Thg}@F?1N`M$8-Y0K_(-fUr3 zHtU#Qp+N{3Nko$vvo?JBch)Xvi_lN9;VTSdXUOcCuWbWM+l|I`rhH2JO^OBWj~7lk zN6w$MWa#^T>Uw&su`pON`g0UWOCHT=vvX| zqgq7H_I;&AO9Py;=F(8P;I7mz!EB)>p>3fOM1{||E|o@G5yQ?i!QCeoQr0pGn|yiw zIs6H}2HrB-YPpIy3)E`}YT{O?T0Ai8`>>9V7(=B{5nBlTDItJ6>j zjMYqayc(xwz<73*QbCz0pJSbO5=^Hs%9gT5Q$4rdSTASPVl_2#4zqf*v5Tg_-a0M~ zg-Lz^tU9x}oSZVj1>?0pp-9+d{$h?Wmzf{UA1yzbvUL8h*!uZ zy*QX>VY{8Hi-(n#9=|WiSKXh-Kiuc_-cx77;A%j%xRAA+jKKvZGit4Va5f5Sg+0YQXK=bHEG>4^+b{IS!Az;^ z0&h}#hb|i}>;~>NVUM^(Dj^q_pQ6xwicWJT%GJ8kLAo24+@8)=YnHjy_{#7brHrSB zZMvwy-;xJBrJCAmU14^9LC&_B-2bZGkD9up^htT(%@p-bOu5*_F-M}yMGf=U_e__I z3$^U=#@^6nJT*pz3WVka>j$R?!wA*g&`AA)ansV={$eTRv*vi8`CQ*sUt8ZGZ<6*- z$qkSEHr*wYT7Q8(&uV1)jUxKK&~KqZp+7@g^cH4qJFojd7!G4{yHZeXgf8<}wYMs( zJMqtYpsn|O=UIs_iXzEEPL#sa^=_dj!9JlZ`dOo^b--Tgl!D2WNxUakr;eXbUUghP z0rN9K8bbwk#A#|5w|d~=@EKj!2lEX1^7J%LRAc*p46^@V_ zan)@uyV|&js?pP8md3czBcqo18!-wm-SO5a{YbDwumXNtk3+peYA7aDKGZ0*Ce)GZ zxoXj~kh-evQQyq;XYt4Tx_NVZepde>@9n`VO^?gy1IE6sHPpBtdKp|Byc%p7YOAB( zvvZ<1{a%_Vmr&{}6_ql|R3)giRePz!)SK!YG~h)&ca?Hd83FHGYEdh+DRe$`Kp$tw zXc;qtD6MlwyK99@;%TyqpgdC9gVN-MG!1!Y0^Vww@JO0&-ZisZ<5>y4c)hpAS~bxC zmc})!D(m48nRJBR*$&y`sTDd<&y^HAD1Ck9qesNXqzlE=j_w$7%#%YIE&OU{F^A%! z(MKO-Ofs73%Yw~PF9co%wx?zfl`}kcZo!rgtNFa!dh(UfGVq z^oH=tNwIDk2SeF|qf>pszM&{%l6l>VaBR9I8NoQZinXbcyC}O=)syV`;3?#JrEXIt zd|MvKy~W4wGE~(Yt@36k{I$NA z(qYTX7-xKIwnkSy1#aj_ zGW}xiQ{je`Nx7)J2ebY|&MB=D;u(X6)SKyM{2#rER4@+w%t?Pd)p&x<>cZxJa2bX zsc!znm*$<(z&M19Q!XQqSo7#7* zHOvBj>5YO92mVtV-6!5W0OWP7U&t7EK{3qImIClLqTS%P2ctq$^k9~}`B9ks+~ zYGc#_YBX5sW4V(w5!CFa`AE+YTA#Wha4Rq~cvfF;ma{gRZwmoJ<>pch+V|{u*g2@zmys(>HM5xqs6vXN4o$U+g9W`6ijggL zhb24;Cjdq3jW@*ao(>G;+ZTDc)S zm|EaWbcD=fx0#blyED(_7M=|?sORE@n<(6()S+s6btWUJsf*Po>MPYz66Euu4>$9C zP7-^6Z~HzG|CTuo7tNREHtUFe(CH1jyn)`|A$a6V@Eson;{%UH`cA9q3qOFZ@s+zR zP4#ewbyX1;)<1B3>0%W_tyqstvN&qyRd#-|$wHD(tL*FMTjeRQRz)xTJ1%asy?=Xd zsDtHJ;sQE!9jQ!jv%=<}CEa1H)Gvolh1Tjh&4tz|6naq8KrDR}E zTt$ankNL4i`BgFGLg=(ie51$G&lTK*RJoJv!(@9W$o4Amq{@@s90w0N>C6&j*fod9 z5bxm`^sBN`>81RG;&!|Am9!ey-yG;&SI!RS3aFl9?nE6t*g%1dcQkirJ8tpt^;SO3ohRnhS3?65buaL6G7$6p!xZU&wgePE*ASqkO(tEK_ z#ljfK53j!j+@SA3K|YF8=z^VB(a+1^)z2Qso7IjfKF%ce0ZJZgZIx*DzGR^`A z&_7m&8F4wS3Yd!j`(04pzTzojK|Z({Q^g^$xL@IJ(Va@F8NI;Q?0U=S!&Ju0=i2k+8s?&@u9gD{=46=6tpqSQ1K^pDe@b zO!X?->zV7@t=4uO_$=?yEX;@P7Vao^KrMVPI{WLFAjrF&u)}jc*m0wYXno;aEyHo(DcE!zx{5^5whr{nCZ_QT za==&KNY}e9m~-Y2dKs0^wqZz+w;Tm+6)ux2bcvJ@qW|cna+Vv zmINoMDPw*o>J!z`ScmY%{$%V&a_aU_uF~?M~C( z>(8vr1U^!O=&Hj6i(~bEbPl+?#a4=@-Bv}Vs8o)qY)aFVMD>b#MF~nFVGKF*aq7Kf zkPk(OcVekvcj6uKyZr(@>kbhk3;wS@;!icYDFW;C1vTv=6s$kPhbv6C!&J;H}m$g(-f3B0~*bHaJ9m`y^`>W zJg~GUz|tuV^CrwB4Z`K9&Hrmg?fnKm;Bb0j-RaZygFV}t_2VPYJqj~jCyKWjoUG89oI~DzC8`M+rsEhB|SwVht(5rk!hp03wuQPK~g9khcu3IT)_C;pN zsWe`SDR!2o(zE#lo?@Vkc_e7mjv=_S1Jc-4VdiZ{`Yju{A5QUr7q>tZuhs^++pU0Z(Os}vcyub4F zDys5(MtGi&(HtaGhdZD)kLZ=}WA)tPX@)f)<>)R}6lRF&2=pl^VYS}%>6`Yj5f@Az{`difb(>)wPp(u6Z0PLMmCgr8T5E_ij+0^1px z2JDRu=;n9lJ$v(OAS~fk^yCim+5_YG7PBS}wnYbKN_ILo_uw3VViz&sh@Ay}nhyKw z7gkaN>v0&PGZfD8aCnjZc&`pzVL5mL1zrB|NPalKNl>m*Xx6!`L{FznVu(kDp0#@oQMkec6ZdF)x=>88m}s9mn%7!M>4^ zlPwguL?6>KhLw!N6rRW-=0_?VoC}QgMCQ<3#^ebR%i?7+M+6vTQOp-V)$uj{?I0t3 zmY)v0?C+u1vz2GMf+w~Vw(UID;8eQqBZ)n|S?P7)A%(}~OU5A!qZ7l3c!(D%@J>ST zw?44$65X3ZVJTjA<-BM=(#Z|LS!*!QOY^iZil2xDBgAZQZoBhry0YIDgN3{c1@M=Q z#vLa&YhNHjc67=U$(GPz8O7C?CAR#{dI_jo3@bo9xTmX|@TwES^G`S=W|qCmcp}Vx`3bzC4%|~nh)d&U-4YjbPl@nEZ@@%f zM^-TfUQH|1iC!XQI%a8S*3v$@4z*Eyt;0CEE5PyROKHzhZp$!a92c zS1q>~!MQTM;N^{j{riC3^EqSt6^zhT@N9kXK%2wYYR25k$DULZX4pkg&f@d{w!q}N z%Z!-Ld^*f%U8n0BK3!lLzy9RDSHg^T7QBsfQuz6{%W8Bd@$Ion{?{%U^e0Ia?R zthv$bbKks|kU8|m$wh3xD8xyp<-1B((77?n1i8C3 zNqolrB*U8AAZ{dveg=QMNkm%)>!%jo|9ND|lD!xuaVpHL)$AkL*;{@CqsRcGaEMTt zoU4q~L+TFCIzSdM03O?0VILg+DX2LQfa`8zPk#yP>nJPsBOI^v?4keRk2C{TXCY?H zV&Mp<+T37Qe#lOphnTeqM#B-*7yqz+f8vQJ(Gv}`&_^%=@yz8fxX(QB`#KO|CZl#Z z#=iaq^E4BuS?y=WbcJ=%jMy`sD?G+Ln#}jvs4QRcx!2gcPLMZkBHNqnj3&BHg1NJt zUH7z83=YF8aRKbMD_U*O9x|CrGS0V()CFnZD5aVU?%|1kzW{k~sL1f2kkH+yN{gMt;SSGkwLHQzGhIFDyMp)Ll1UoAu zqq>J!c#YbxJK0G+GMQuW6c!MnS`zs$GRB)=XegXe(h^2;A-*o*dY>~!9bj?vbTq1m zt-N-jU%1V4`e;Yfb!$MDJez$i3+s5K_)%V?rFz3&>chPGwUzQ1F&~Wa%xD;{qC;*X zK65iW9jtWb*T!@GGM&?X#$0nL$lf_;3rd9*WHfb!5D{~_uv07xM)()}(~at3rLXM3 z^^)P+{ta8iK^@Q;4oMLw!G34;Mrode%g$7*F_8@x!OpR@1u$6Jgtb}W36F8&)y6c47 zVzd&_Y+s?s^^ubz621M^ic)}nj_dePe!nCKj&pyt&grLvvBCDi=AkKi&}eVXwiBFg zHU@$M z|AGJ6N&CL@62G6pVi4BkZ!lPX=IaTnt4H#B**$Suer1ff(3xPxgZ3Xchr#HX zkE7on>lj$oY#bZA!xufo=`$VW74j50x7X7zFa2wc47?2zO-v-Rm?YpXx@wF-$xtRZy3B0}%zHJ<<0fByW;DTbK_U zZkn@_T&}z@-dT;0Y+P#FK=#zr!M|W^+_4IiRhI^7N)!i5+2ll&x|@`Wpl=_v@i0y! zw5xJ0QF0ekTl!FzU&n1Eqy64oYb?~8>L2xH=0|I|gOdhoKrc0OPBN=~@+bLArIJz` zH>h;VI^1m%#o@4FKar*85oh3*)51P!nnqQ)b5+r2f1kGZ7a{wKbIUPL-bqtIq5q^q zvypr%5nkW#?tSoro_rW{VG9vpA9 zv=y?}667j7gfFQsno0TOn)uw6)J|!U_%&=&7s~;mpK}O2qqN--eh3Ua`@UJuD8@Mp zmGMCD;&ee-Yf*vR;Oj7)zqZJVQdQ}OGhBKU>P@8-_>W)1s&wtC?j-Sulq7C)XIVec zZWqo!iTHj)vvmQ7jgwgQl{$)&>Fd{GU{W=NS zK8kZu?&8|B2~Na#r@4I)7t+a2Z$ZGRB&#seZg2YNTn%zl#Y*)5FN;<1Vr|7f+XTFa zAchCqa_awgX-u5}SzNLsvR@fgj8Dlq?pfEZCU!P@w@=}NjdOd$r^*GJbQSySpUjg{ zu)^}TNNFM-5x!`H%yLn@gZ?%i`nIM(l#N=z7)zASo(QR zO5tE%!?yCXoAiWt@Jxu7PD|zFyK*wjV-rC;pwL5Z>&=-$6g}J$fn(a@Vdz4!blkDKdZ?Gyl8IU;s~!BtPbBgZRCl+#MGau>GXA95>cm@vaBX0PUKhI}|jTxA6g zaPBf|vr1*eFNHYrk&{AQ{^vU?)rBy>R)JDaa2C3wg{vyfy%|IbG3%v zMc{n1JcpD|;#~JB$nhY!Xp`-MoXf5Z37@&e?isq)8SzuAN&U7_iNaxS z5a%u(r@MJsOm*@0qdz*I&is2i(m9;j^rVX7z!PxCi}T?U=8~6!R3(tHOYG-kz$Pz| zH^#c(kf|rq#T!Vz--Y_17k_u1?7?x`kt4g*^h3!G$~yb(&9DK&stFwrgNn`zcIl(^ z2&&;lSr^PZmYuo=8P*!-n(L8TsRjLWqL0TuiazFRuXGVIpx>In4jnJ-v~L*CgUbSa zQ*s4frIy!sm`CjFuo?S`yWpy|lCR4p)%2dH-hi*M?}fHUelNV`UPPRtM0>8a!W?6? z)>nmIhYA>b%o_GBXDUyjsa$|H(_N{hOjq*aLA6MYQD4esq$a{Drv-lFTj^_UC8s!J z{{^3|E!kuzXBa1Uic+k+R9+~@%j>1jczhHk{~VjzW?;0d>eXnNUAvg#X0g9ii4QX&G&Q_qHSncZ!{ZB3T)Nt!EP z#QQE;z3myoi8`OWy|s8bQ9!K)=YEM*$y#adHje6_LyJQ(dNbppIo7U(3(g+tGd$dT zbZssvCDkLU2p@O3l0$AH4ssvTOP8z*rb_=l#p;67lt#X`hz`w8SliR&hMdgwA5~sU zX(cu4OVFyN=o{*}#c?Y=WZks7!#n?*U2-y!Z!NXbVk(FtV0^R4D34P+cEY3QFiu>p z$Z;pYAzEY?VwMzVcf-?DSHFAgO!cHty4J1w#RR+s{EUrQ(3KE^sM&P^0)PcwA*qXv7KAO zsc!#nm9mza|KQ;1hGvD%h3@H-&E|G)_``ooZ{Y) zrRP})PnE79q7%U@R5ZeEVIxmNA5sbYtFQZzY~=&1I~=hR?d2fl%|M&lfjMP^ncoPn zK>>xsaVHPzpdn&)CEi;gs&ed!n4A$))otPgDz_YP*3P*LtZGJ6Tw6mads6NNB12P+ zZ|(j(xtZu3a-t>Zp>)$Wcp(sQ29=~`kRQW`uI@Nw2bn;%E_;SVLe?LSzEo5;Q*u$-ZW=pdny$%m7(f&k%gU%@TS9BGx zB?mX63rbJw-P@8P^`T;)M}D2xtpRuB5!m}xxWn6-2bMFAoF*rz)<~+oXXpoxQkh<- z4j4lIHlO)$j+K&3cKi!->M>Pnp+!Vgj2RqTB&wBX z0bX72gjejKErm|@VIzzFbMS27Y|7Zc55b4}WGl((3M0K7{(`M}a{s7VyvKc(?~u2b z_PgAik^J51V0Wj3`!hbx&-GmT793AM>3^DwZCGvM4e2KRl0mT4v!e^m&!`Mh%PV`O z3}SEh9yz^()9Mmj;IqRRYhhOepPU32u9#Ss+~~BN1$9qB%Y82Tltt!q)iPlfwFpY_ull=u|7%SYrd&&ceSF{5_b3t2PGdG%p;_>n&k zAmh6ND{~dx-;B6pCdhG~(vjz5uEmUuc&YlOmt?-3rHkTb_cyDdu{3l$wOZhDN@~Cp z8en{B7jq}!_BL8_QSz{>Ykps8|8-x4_j~oU6cje2`SEj>$uanPt&O4jk6aX%Ib;opA7`FfRt$Q~C2gDxx;%IEoNcK2x0)!1W~x=e2L$Q$iUS+G8VH#%Qqv zBiTu$VpQ~AX&0vxeq}#1YU<5{JpyA=1_$N^%W#rH4Qj&nIH+x;Zt1B^(@uLun3;9G zuhcej1)wpX4kde zm@SzjWx%+Ex+(w$6{E)abVQ>kVE!S=zHYzKpTGto-5!|wCR)>3wyX><)1 z+J3sPHRxQ`MhQ|C1n^g(1KjJLzV%TT;Us#BUF6SkE-ub^;-(_;qc?}If`AEcxY z>`l$4?=*jK@QM&i!w{p=R&r>Sz3Y94eT%)DwT4Q7Zs;JK91k*2bk2%9qigymPD6RF zZ!#0@EA9%hh+IQCN1oDJ-3TAa#|eiK>UFt|v_*K~RIvkQ99*{7hHGRrhnn3j%U> z!%hHq?~X^t*Hpe^QI?Hjr7fmzKW&x3$0*#xo5R|vLA0*Jx~hhaxs6>`adGUg+3?H--{OASXZ1p+q;+FZyaEw#t zK*r=}kTsj=T%KyEBeD0eQ-KPhKZr{LtEVEq0sX`NkiJgQuVdek-By*;39IQ|X(FdG z+Ov(#p?txifki1@11*9F^yStO_X~1vuhLqntISj4vYhRxqyU z9%F^^4XlAJ;0^WRx&0-TQ);X4)tp*Tovfz7c#M!wh~EmIaVB14T{Gk1qs=qxv4*~& z0@_T4R)CzMle7*+@IW{mWnl8);6n9L&_0Vx^h)cP^`4GceRiPZb|i|^MowNZ$SW{i zLuC9HoQ~jy_|=PhF4 zs&rThse|!Ry{cAIFH!xZpgN0|jtHgQv3AfrXh_C8y*K=nWb=@<(!PvqdS{$ht9vSY zn|XSu>*O=w4{M}qoN74Neaudm2bNPUxW~U*1+xRirIpoIs9DtO_V8d`w_L2ws>4|Hcu z1dZk9JZp!&9Sooms^i>r^IwR48IyfdX2~Pobx*=hXl`f24gNWk$O$Dk-nylC3|&ff z1EW%R2Y-USdEPeML7d8zN`3c@@>=Pw-Ql#l(cXWx+)4+rk$V{~E-YD2DBXdQBne(q zN$#?<_1fOb$yU|mL5dH)>rG`dPU_F7l{+hK~lZ9JO694y2_3AeF&#T6p?$V z$Gp=bt3-bnxz|%s-UIgO2uI1NL8r{IdYhmXD4g0Um|IUW{)C@95JlZq9A36bUE~Ng zsIByt^TiQYqTxp`q#8XA)*7Ny5O13Bdb7iT%WICZF4&pj71e>AG#+Mef4PuiD9hBN zYE8AMvR%p~UZiV2hN|rznfF*ch|^p3L5Ux_)j;vzFbf(`W0aEnz>`V9P1Hd9+Z5-K znkbhCxG$(`df>^>22RHbry>pl52=$^kmI+dgE<=9|5v&LSE&u3(-F)ApW!U^QXjgg z=fPQ~knLUq4;q0Ie>UjD}_=WOw90)x+8)Ed`$Ic>1^_rKND(CQCnZ0$*Euj(L%& ze~`1oZjl`%!vDAfYkeRXWi)loE2#rcm0>SWvJaeeP3eI&81~m_p^f_^eeY9l9ensI zp*yXI9JZ%j(*2FxtUB7}OVqXaE0JqflUhl8a3LCl-na>FO!4%cO3~*^PcHcnDD^`- z6Npe@#{er`Km|Sv{ckO}47cfj&%jym2^e=idLmsw9Inxgh++hLh$obMz7Y}M_!nvW zrMq}X&zFy>Uuol%kWj`ZOAj5_Uz^*Ro%^kTaTBaRH}1IeoG;P!Z^L=efy0}~j5#Z3 z0xy{ZUv!*Y3=Q%VFphsfUJts8`w)a zblwU&Mc_0xu`fB3SZ8HWl5eGNJC~EsR!MR4PsGDrc+70Xt1gRp2hMC4`d(Y;2+txL zaBPVRz5zX*Z^7EL(&hdUe#KkPeVK-)s;3yky6p#FW{Q-I8nv@DMf?+dx-E)|C)}+c zmEsz_5`SQ1%7B-Yb+6I88wgwDE1ttOdR23Am#IOJ_e zz1Rbni3ePdQ=f=E&v^xp)Gr)%%G!NVE=W!j*b7!3-YL3EIjv41q6@l;(|NQbCP{OVSV& zA{8d|T%K2uc^H8QSaq0RM?mx2;1#l&&-ok0ZEJGg+A!A^)4Lo-*X0eUok4$iAWtZc zuY2jAo4ohoG#e4X^OOyyTt2KYNpzc9s7BdB*G! zpYEp*nvLttj^|AtbRX4-^?j!?%@9}r zk4zmkjU!{?|znFr_#PVOAq~DM(Hk}|CH-^$MqT9F}X4C5RIEw4)nRXdF7&8 zUl2ZHNk*+SpB4V3@aL6C`%^)FA}d#$9(8vNTuple[int, np.ndarray]: + """load wav file. + + Args: + wavpath (str): wav path. + sr (int, optional): expect sample rate. Defaults to None. + dtype (str, optional): wav data bits. Defaults to 'int16'. + + Returns: + Tuple[int, np.ndarray]: sr (int), wav (int16) [T, C]. + """ + wav, r_sr = sf.read(wavpath, start=start, stop=stop, dtype=dtype, always_2d=always_2d) + if sr: + assert sr == r_sr + return r_sr, wav + + +def write(wavpath:str, wav:np.ndarray, sr:int, dtype='PCM_16'): + """write wav file. + + Args: + wavpath (str): file path to save. + wav (np.ndarray): wav data. + sr (int): data samplerate. + dtype (str, optional): wav bit format. Defaults to 'PCM_16'. + """ + sf.write(wavpath, wav, sr, subtype=dtype) + + +def frames(x: Tensor, + num_samples: Tensor, + sr: int, + win_length: float, + stride_length: float, + clip: bool = False) -> Tuple[Tensor, Tensor]: + """Extract frames from audio. + + Parameters + ---------- + x : Tensor + Shape (B, T), batched waveform. + num_samples : Tensor + Shape (B, ), number of samples of each waveform. + sr: int + Sampling Rate. + win_length : float + Window length in ms. + stride_length : float + Stride length in ms. + clip : bool, optional + Whether to clip audio that does not fit into the last frame, by + default True + + Returns + ------- + frames : Tensor + Shape (B, T', win_length). + num_frames : Tensor + Shape (B, ) number of valid frames + """ + assert stride_length <= win_length + stride_length = int(stride_length * sr) + win_length = int(win_length * sr) + + num_frames = (num_samples - win_length) // stride_length + padding = (0, 0) + if not clip: + num_frames += 1 + need_samples = num_frames * stride_length + win_length + padding = (0, need_samples - num_samples - 1) + + weight = paddle.eye(win_length).unsqueeze(1) #[win_length, 1, win_length] + + frames = F.conv1d(x.unsqueeze(-1), + weight, + padding=padding, + stride=(stride_length, ), + data_format='NLC') + return frames, num_frames + + +def dither(signal:Tensor, dither_value=1.0)->Tensor: + """dither frames for log compute. + + Args: + signal (Tensor): [B, T, D] + dither_value (float, optional): [scalar]. Defaults to 1.0. + + Returns: + Tensor: [B, T, D] + """ + D = paddle.shape(signal)[-1] + signal += paddle.normal(shape=[1, 1, D]) * dither_value + return signal + + +def remove_dc_offset(signal:Tensor)->Tensor: + """remove dc. + + Args: + signal (Tensor): [B, T, D] + + Returns: + Tensor: [B, T, D] + """ + signal -= paddle.mean(signal, axis=-1, keepdim=True) + return signal + +def preemphasis(signal:Tensor, coeff=0.97)->Tensor: + """perform preemphasis on the input signal. + + Args: + signal (Tensor): [B, T, D], The signal to filter. + coeff (float, optional): [scalar].The preemphasis coefficient. 0 is no filter, Defaults to 0.97. + + Returns: + Tensor: [B, T, D] + """ + return paddle.concat([ + (1-coeff)*signal[:, :, 0:1], + signal[:, :, 1:] - coeff * signal[:, :, :-1] + ], axis=-1) + + +class STFT(nn.Layer): + """A module for computing stft transformation in a differentiable way. + + http://practicalcryptography.com/miscellaneous/machine-learning/intuitive-guide-discrete-fourier-transform/ + + Parameters + ------------ + n_fft : int + Number of samples in a frame. + + sr: int + Number of Samplilng rate. + + stride_length : float + Number of samples shifted between adjacent frames. + + win_length : float + Length of the window. + + clip: bool + Whether to clip audio is necesaary. + """ + def __init__(self, + n_fft: int, + sr: int, + win_length: float, + stride_length: float, + dither:float=0.0, + preemph_coeff:float=0.97, + remove_dc_offset:bool=True, + window_type: str = 'povey', + clip: bool = False): + super().__init__() + self.sr = sr + self.win_length = win_length + self.stride_length = stride_length + self.dither = dither + self.preemph_coeff = preemph_coeff + self.remove_dc_offset = remove_dc_offset + self.window_type = window_type + self.clip = clip + + self.n_fft = n_fft + self.n_bin = 1 + n_fft // 2 + + w_real, w_imag, kernel_size = dft_matrix( + self.n_fft, int(self.win_length * self.sr), self.n_bin + ) + + # calculate window + window = get_window(window_type, kernel_size) + + # (2 * n_bins, kernel_size) + w = np.concatenate([w_real, w_imag], axis=0) + w = w * window + # (kernel_size, 2 * n_bins) + w = np.transpose(w) + weight = paddle.cast(paddle.to_tensor(w), paddle.get_default_dtype()) + self.register_buffer("weight", weight) + + def forward(self, x: Tensor, num_samples: Tensor) -> Tuple[Tensor, Tensor]: + """Compute the stft transform. + Parameters + ------------ + x : Tensor [shape=(B, T)] + The input waveform. + num_samples : Tensor [shape=(B,)] + Number of samples of each waveform. + Returns + ------------ + C : Tensor + Shape(B, T', n_bins, 2) Spectrogram. + + num_frames: Tensor + Shape (B,) number of samples of each spectrogram + """ + batch_size = paddle.shape(num_samples) + F, nframe = frames(x, num_samples, self.sr, self.win_length, self.stride_length, clip=self.clip) + if self.dither: + F = dither(F, self.dither) + if self.remove_dc_offset: + F = remove_dc_offset(F) + if self.preemph_coeff: + F = preemphasis(F) + C = paddle.matmul(F, self.weight) # [B, T, K] [K, 2 * n_bins] + C = paddle.reshape(C, [batch_size, -1, 2, self.n_bin]) + C = C.transpose([0, 1, 3, 2]) + return C, nframe + + +def powspec(C:Tensor) -> Tensor: + """Compute the power spectrum |X_k|^2. + + Args: + C (Tensor): [B, T, C, 2] + + Returns: + Tensor: [B, T, C] + """ + real, imag = paddle.chunk(C, 2, axis=-1) + return paddle.square(real.squeeze(-1)) + paddle.square(imag.squeeze(-1)) + + +def magspec(C: Tensor, eps=1e-10) -> Tensor: + """Compute the magnitude spectrum |X_k|. + + Args: + C (Tensor): [B, T, C, 2] + eps (float): epsilon. + + Returns: + Tensor: [B, T, C] + """ + pspec = powspec(C) + return paddle.sqrt(pspec + eps) + + +def logspec(C: Tensor, eps=1e-10) -> Tensor: + """Compute log-spectrum 20log10∣X_k∣. + + Args: + C (Tensor): [description] + eps ([type], optional): [description]. Defaults to 1e-10. + + Returns: + Tensor: [description] + """ + spec = magspec(C) + return 20 * paddle.log10(spec + eps) + diff --git a/third_party/paddle_audio/frontend/kaldi_test.py b/third_party/paddle_audio/frontend/kaldi_test.py new file mode 100644 index 000000000..34ff413c5 --- /dev/null +++ b/third_party/paddle_audio/frontend/kaldi_test.py @@ -0,0 +1,533 @@ +from typing import Tuple +import numpy as np +import paddle +import unittest + +import decimal +import numpy +import math +import logging +from pathlib import Path + +from scipy.fftpack import dct + +from third_party.paddle_audio.frontend import kaldi + +def round_half_up(number): + return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP)) + +def rolling_window(a, window, step=1): + # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step] + + +def do_dither(signal, dither_value=1.0): + signal += numpy.random.normal(size=signal.shape) * dither_value + return signal + +def do_remove_dc_offset(signal): + signal -= numpy.mean(signal) + return signal + +def do_preemphasis(signal, coeff=0.97): + """perform preemphasis on the input signal. + + :param signal: The signal to filter. + :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95. + :returns: the filtered signal. + """ + return numpy.append((1-coeff)*signal[0], signal[1:] - coeff * signal[:-1]) + + +def framesig(sig, frame_len, frame_step, dither=1.0, preemph=0.97, remove_dc_offset=True, wintype='hamming', stride_trick=True): + """Frame a signal into overlapping frames. + + :param sig: the audio signal to frame. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :param stride_trick: use stride trick to compute the rolling window and window multiplication faster + :returns: an array of frames. Size is NUMFRAMES by frame_len. + """ + slen = len(sig) + frame_len = int(round_half_up(frame_len)) + frame_step = int(round_half_up(frame_step)) + if slen <= frame_len: + numframes = 1 + else: + numframes = 1 + (( slen - frame_len) // frame_step) + + # check kaldi/src/feat/feature-window.h + padsignal = sig[:(numframes-1)*frame_step+frame_len] + if wintype is 'povey': + win = numpy.empty(frame_len) + for i in range(frame_len): + win[i] = (0.5-0.5*numpy.cos(2*numpy.pi/(frame_len-1)*i))**0.85 + else: # the hamming window + win = numpy.hamming(frame_len) + + if stride_trick: + frames = rolling_window(padsignal, window=frame_len, step=frame_step) + else: + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + frames = padsignal[indices] + win = numpy.tile(win, (numframes, 1)) + + frames = frames.astype(numpy.float32) + raw_frames = numpy.zeros(frames.shape) + for frm in range(frames.shape[0]): + frames[frm,:] = do_dither(frames[frm,:], dither) # dither + frames[frm,:] = do_remove_dc_offset(frames[frm,:]) # remove dc offset + raw_frames[frm,:] = frames[frm,:] + frames[frm,:] = do_preemphasis(frames[frm,:], preemph) # preemphasize + + return frames * win, raw_frames + + +def magspec(frames, NFFT): + """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame. + """ + if numpy.shape(frames)[1] > NFFT: + logging.warn( + 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', + numpy.shape(frames)[1], NFFT) + complex_spec = numpy.fft.rfft(frames, NFFT) + return numpy.absolute(complex_spec) + + +def powspec(frames, NFFT): + """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame. + """ + return numpy.square(magspec(frames, NFFT)) + + + +def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, + nfilt=23,nfft=512,lowfreq=20,highfreq=None,dither=1.0,remove_dc_offset=True,preemph=0.97, + ceplifter=22,useEnergy=True,wintype='povey'): + """Compute MFCC features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param numcep: the number of cepstrum to return, default 13 + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. + :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,dither,remove_dc_offset,preemph,wintype) + feat = numpy.log(feat) + feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] + feat = lifter(feat,ceplifter) + if useEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy + return feat + +def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=40,nfft=512,lowfreq=0,highfreq=None,dither=1.0,remove_dc_offset=True, preemph=0.97, + wintype='hamming'): + """Compute Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + winfunc=lambda x:numpy.ones((x,)) + :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The + second return value is the energy in each frame (total energy, unwindowed) + """ + highfreq= highfreq or samplerate/2 + frames,raw_frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, dither, preemph, remove_dc_offset, wintype) + pspec = sigproc.powspec(frames,nfft) # nearly the same until this part + energy = numpy.sum(raw_frames**2,1) # this stores the raw energy in each frame + energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log + + fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) + feat = numpy.dot(pspec,fb.T) # compute the filterbank energies + feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log + + return feat,energy + +def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=40,nfft=512,lowfreq=64,highfreq=None,dither=1.0,remove_dc_offset=True,preemph=0.97,wintype='hamming'): + """Compute log Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,dither, remove_dc_offset,preemph,wintype) + return numpy.log(feat) + +def hz2mel(hz): + """Convert a value in Hertz to Mels + + :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Mels. If an array was passed in, an identical sized array is returned. + """ + return 1127 * numpy.log(1+hz/700.0) + +def mel2hz(mel): + """Convert a value in Mels to Hertz + + :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Hertz. If an array was passed in, an identical sized array is returned. + """ + return 700 * (numpy.exp(mel/1127.0)-1) + +def get_filterbanks(nfilt=26,nfft=512,samplerate=16000,lowfreq=0,highfreq=None): + """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond + to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1) + + :param nfilt: the number of filters in the filterbank, default 20. + :param nfft: the FFT size. Default is 512. + :param samplerate: the samplerate of the signal we are working with. Affects mel spacing. + :param lowfreq: lowest band edge of mel filters, default 0 Hz + :param highfreq: highest band edge of mel filters, default samplerate/2 + :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter. + """ + highfreq= highfreq or samplerate/2 + assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2" + + # compute points evenly spaced in mels + lowmel = hz2mel(lowfreq) + highmel = hz2mel(highfreq) + + # check kaldi/src/feat/Mel-computations.h + fbank = numpy.zeros([nfilt,nfft//2+1]) + mel_freq_delta = (highmel-lowmel)/(nfilt+1) + for j in range(0,nfilt): + leftmel = lowmel+j*mel_freq_delta + centermel = lowmel+(j+1)*mel_freq_delta + rightmel = lowmel+(j+2)*mel_freq_delta + for i in range(0,nfft//2): + mel=hz2mel(i*samplerate/nfft) + if mel>leftmel and mel 0: + nframes,ncoeff = numpy.shape(cepstra) + n = numpy.arange(ncoeff) + lift = 1 + (L/2.)*numpy.sin(numpy.pi*n/L) + return lift*cepstra + else: + # values of L <= 0, do nothing + return cepstra + +def delta(feat, N): + """Compute delta features from a feature vector sequence. + + :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector. + :param N: For each frame, calculate delta features based on preceding and following N frames + :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector. + """ + if N < 1: + raise ValueError('N must be an integer >= 1') + NUMFRAMES = len(feat) + denominator = 2 * sum([i**2 for i in range(1, N+1)]) + delta_feat = numpy.empty_like(feat) + padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat + for t in range(NUMFRAMES): + delta_feat[t] = numpy.dot(numpy.arange(-N, N+1), padded[t : t+2*N+1]) / denominator # [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1] + return delta_feat + +##### modify for test ###### + +def framesig_without_dither_dc_preemphasize(sig, frame_len, frame_step, wintype='hamming', stride_trick=True): + """Frame a signal into overlapping frames. + + :param sig: the audio signal to frame. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :param stride_trick: use stride trick to compute the rolling window and window multiplication faster + :returns: an array of frames. Size is NUMFRAMES by frame_len. + """ + slen = len(sig) + frame_len = int(round_half_up(frame_len)) + frame_step = int(round_half_up(frame_step)) + if slen <= frame_len: + numframes = 1 + else: + numframes = 1 + (( slen - frame_len) // frame_step) + + # check kaldi/src/feat/feature-window.h + padsignal = sig[:(numframes-1)*frame_step+frame_len] + + if wintype is 'povey': + win = numpy.empty(frame_len) + for i in range(frame_len): + win[i] = (0.5-0.5*numpy.cos(2*numpy.pi/(frame_len-1)*i))**0.85 + elif wintype == '': + win = numpy.ones(frame_len) + elif wintype == 'hann': + win = numpy.hanning(frame_len) + else: # the hamming window + win = numpy.hamming(frame_len) + + if stride_trick: + frames = rolling_window(padsignal, window=frame_len, step=frame_step) + else: + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + frames = padsignal[indices] + win = numpy.tile(win, (numframes, 1)) + + frames = frames.astype(numpy.float32) + raw_frames = frames + return frames * win, raw_frames + + +def frames(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=40,nfft=512,lowfreq=0,highfreq=None, wintype='hamming'): + frames_with_win, raw_frames = framesig_without_dither_dc_preemphasize(signal, winlen*samplerate, winstep*samplerate, wintype) + return frames_with_win, raw_frames + + +def complexspec(frames, NFFT): + """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame. + """ + if numpy.shape(frames)[1] > NFFT: + logging.warn( + 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', + numpy.shape(frames)[1], NFFT) + complex_spec = numpy.fft.rfft(frames, NFFT) + return complex_spec + + +def stft_with_window(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=40,nfft=512,lowfreq=0,highfreq=None,dither=1.0,remove_dc_offset=True, preemph=0.97, + wintype='hamming'): + frames_with_win, raw_frames = framesig_without_dither_dc_preemphasize(signal, winlen*samplerate, winstep*samplerate, wintype) + + spec = magspec(frames_with_win, nfft) # nearly the same until this part + scomplex = complexspec(frames_with_win, nfft) + + rspec = magspec(raw_frames, nfft) + rcomplex = complexspec(raw_frames, nfft) + return spec, scomplex, rspec, rcomplex + + +class TestKaldiFE(unittest.TestCase): + def setUp(self): + self. this_dir = Path(__file__).parent + + self.wavpath = str(self.this_dir / 'english.wav') + self.winlen=0.025 # ms + self.winstep=0.01 # ms + self.nfft=512 + self.lowfreq = 0 + self.highfreq = None + self.wintype='hamm' + self.nfilt=40 + + paddle.set_device('cpu') + + + def test_read(self): + import scipy.io.wavfile as wav + rate, sig = wav.read(self.wavpath) + sr, wav = kaldi.read(self.wavpath) + wav = wav[:, 0] + self.assertTrue(np.all(sig == wav)) + self.assertEqual(rate, sr) + + def test_frames(self): + sr, wav = kaldi.read(self.wavpath) + wav = wav[:, 0] + _, fs = frames(wav, samplerate=sr, + winlen=self.winlen, winstep=self.winstep, + nfilt=self.nfilt, nfft=self.nfft, + lowfreq=self.lowfreq, highfreq=self.highfreq, + wintype=self.wintype) + + t_wav = paddle.to_tensor([wav], dtype='float32') + t_wavlen = paddle.to_tensor([len(wav)]) + t_fs, t_nframe = kaldi.frames(t_wav, t_wavlen, sr, self.winlen, self.winstep, clip=False) + t_fs = t_fs.astype(fs.dtype)[0] + + self.assertEqual(t_nframe.item(), fs.shape[0]) + self.assertTrue(np.allclose(t_fs.numpy(), fs)) + + + def test_stft(self): + sr, wav = kaldi.read(self.wavpath) + wav = wav[:, 0] + + for wintype in ['', 'hamm', 'hann', 'povey']: + self.wintype=wintype + _, stft_c_win, _, _ = stft_with_window(wav, samplerate=sr, + winlen=self.winlen, winstep=self.winstep, + nfilt=self.nfilt, nfft=self.nfft, + lowfreq=self.lowfreq, highfreq=self.highfreq, + wintype=self.wintype) + + t_wav = paddle.to_tensor([wav], dtype='float32') + t_wavlen = paddle.to_tensor([len(wav)]) + + stft_class = kaldi.STFT(self.nfft, sr, self.winlen, self.winstep, window_type=self.wintype, dither=0.0, preemph_coeff=0.0, remove_dc_offset=False, clip=False) + t_stft, t_nframe = stft_class(t_wav, t_wavlen) + t_stft = t_stft.astype(stft_c_win.real.dtype)[0] + t_real = t_stft[:, :, 0] + t_imag = t_stft[:, :, 1] + + self.assertEqual(t_nframe.item(), stft_c_win.real.shape[0]) + + self.assertLess(np.sum(t_real.numpy()) - np.sum(stft_c_win.real), 1) + self.assertTrue(np.allclose(t_real.numpy(), stft_c_win.real, atol=1e-1)) + + self.assertLess(np.sum(t_imag.numpy()) - np.sum(stft_c_win.imag), 1) + self.assertTrue(np.allclose(t_imag.numpy(), stft_c_win.imag, atol=1e-1)) + + + def test_magspec(self): + sr, wav = kaldi.read(self.wavpath) + wav = wav[:, 0] + for wintype in ['', 'hamm', 'hann', 'povey']: + self.wintype=wintype + stft_win, _, _, _ = stft_with_window(wav, samplerate=sr, + winlen=self.winlen, winstep=self.winstep, + nfilt=self.nfilt, nfft=self.nfft, + lowfreq=self.lowfreq, highfreq=self.highfreq, + wintype=self.wintype) + + t_wav = paddle.to_tensor([wav], dtype='float32') + t_wavlen = paddle.to_tensor([len(wav)]) + + stft_class = kaldi.STFT(self.nfft, sr, self.winlen, self.winstep, window_type=self.wintype, dither=0.0, preemph_coeff=0.0, remove_dc_offset=False, clip=False) + t_stft, t_nframe = stft_class(t_wav, t_wavlen) + t_stft = t_stft.astype(stft_win.dtype) + t_spec = kaldi.magspec(t_stft)[0] + + self.assertEqual(t_nframe.item(), stft_win.shape[0]) + + self.assertLess(np.sum(t_spec.numpy()) - np.sum(stft_win), 1) + self.assertTrue(np.allclose(t_spec.numpy(), stft_win, atol=1e-1)) + + + def test_magsepc_winprocess(self): + sr, wav = kaldi.read(self.wavpath) + wav = wav[:, 0] + fs, _= framesig(wav, self.winlen*sr, self.winstep*sr, + dither=0.0, preemph=0.97, remove_dc_offset=True, wintype='povey', stride_trick=True) + spec = magspec(fs, self.nfft) # nearly the same until this part + + t_wav = paddle.to_tensor([wav], dtype='float32') + t_wavlen = paddle.to_tensor([len(wav)]) + stft_class = kaldi.STFT( + self.nfft, sr, self.winlen, self.winstep, + window_type='povey', dither=0.0, preemph_coeff=0.97, remove_dc_offset=True, clip=False) + t_stft, t_nframe = stft_class(t_wav, t_wavlen) + t_stft = t_stft.astype(spec.dtype) + t_spec = kaldi.magspec(t_stft)[0] + + self.assertEqual(t_nframe.item(), fs.shape[0]) + + self.assertLess(np.sum(t_spec.numpy()) - np.sum(spec), 1) + self.assertTrue(np.allclose(t_spec.numpy(), spec, atol=1e-1)) + + + def test_powspec(self): + sr, wav = kaldi.read(self.wavpath) + wav = wav[:, 0] + for wintype in ['', 'hamm', 'hann', 'povey']: + self.wintype=wintype + stft_win, _, _, _ = stft_with_window(wav, samplerate=sr, + winlen=self.winlen, winstep=self.winstep, + nfilt=self.nfilt, nfft=self.nfft, + lowfreq=self.lowfreq, highfreq=self.highfreq, + wintype=self.wintype) + stft_win = np.square(stft_win) + + t_wav = paddle.to_tensor([wav], dtype='float32') + t_wavlen = paddle.to_tensor([len(wav)]) + + stft_class = kaldi.STFT(self.nfft, sr, self.winlen, self.winstep, window_type=self.wintype, dither=0.0, preemph_coeff=0.0, remove_dc_offset=False, clip=False) + t_stft, t_nframe = stft_class(t_wav, t_wavlen) + t_stft = t_stft.astype(stft_win.dtype) + t_spec = kaldi.powspec(t_stft)[0] + + self.assertEqual(t_nframe.item(), stft_win.shape[0]) + + self.assertLess(np.sum(t_spec.numpy() - stft_win), 5e4) + self.assertTrue(np.allclose(t_spec.numpy(), stft_win, atol=1e2)) + + +# from python_speech_features import mfcc +# from python_speech_features import delta +# from python_speech_features import logfbank +# import scipy.io.wavfile as wav + +# (rate,sig) = wav.read("english.wav") + +# # note that generally nfilt=40 is used for speech recognition +# fbank_feat = logfbank(sig,nfilt=23,lowfreq=20,dither=0,wintype='povey') + +# # the computed fbank coefficents of english.wav with dimension [110,23] +# # [ 12.2865 12.6906 13.1765 15.714 16.064 15.7553 16.5746 16.9205 16.6472 16.1302 16.4576 16.7326 16.8864 17.7215 18.88 19.1377 19.1495 18.6683 18.3886 20.3506 20.2772 18.8248 18.1899 +# # 11.9198 13.146 14.7215 15.8642 17.4288 16.394 16.8238 16.1095 16.4297 16.6331 16.3163 16.5093 17.4981 18.3429 19.6555 19.6263 19.8435 19.0534 19.001 20.0287 19.7707 19.5852 19.1112 +# # ... +# # ... +# # the same with that using kaldi commands: compute-fbank-feats --dither=0.0 + + +# mfcc_feat = mfcc(sig,dither=0,useEnergy=True,wintype='povey') + +# # the computed mfcc coefficents of english.wav with dimension [110,13] +# # [ 17.1337 -23.3651 -7.41751 -7.73686 -21.3682 -8.93884 -3.70843 4.68346 -16.0676 12.782 -7.24054 8.25089 10.7292 +# # 17.1692 -23.3028 -5.61872 -4.0075 -23.287 -20.6101 -5.51584 -6.15273 -14.4333 8.13052 -0.0345329 2.06274 -0.564298 +# # ... +# # ... +# # the same with that using kaldi commands: compute-mfcc-feats --dither=0.0 + + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From cd001d5daf9d68d7105742b4bfc806eea11e166d Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 07:43:41 +0000 Subject: [PATCH 12/14] change type_as to astype --- deepspeech/modules/encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeech/modules/encoder.py b/deepspeech/modules/encoder.py index fb44fe295..fc1ff3c83 100644 --- a/deepspeech/modules/encoder.py +++ b/deepspeech/modules/encoder.py @@ -159,7 +159,7 @@ class BaseEncoder(nn.Layer): if self.global_cmvn is not None: xs = self.global_cmvn(xs) #TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor - xs, pos_emb, masks = self.embed(xs, masks.type_as(xs), offset=0) + xs, pos_emb, masks = self.embed(xs, masks.astype(xs.dtype), offset=0) #TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor masks = masks.astype(paddle.bool) mask_pad = ~masks From 000183ea496aa054cdac11ac86186d36cd27c8df Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 08:16:04 +0000 Subject: [PATCH 13/14] tensor.size to tensor.shape --- deepspeech/exps/u2/model.py | 2 +- deepspeech/exps/u2_kaldi/model.py | 2 +- deepspeech/exps/u2_st/model.py | 2 +- deepspeech/models/u2/u2.py | 24 ++++++++++++------------ deepspeech/models/u2_st.py | 14 +++++++------- deepspeech/modules/attention.py | 15 ++++++++------- deepspeech/modules/decoder.py | 2 +- deepspeech/modules/embedding.py | 4 ++-- deepspeech/modules/encoder.py | 20 ++++++++++---------- deepspeech/utils/ctc_utils.py | 12 ++++++------ deepspeech/utils/tensor_utils.py | 10 +++++----- 11 files changed, 54 insertions(+), 53 deletions(-) diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 2e512ef1e..7095ed749 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -579,7 +579,7 @@ class U2Tester(U2Trainer): # 1. Encoder encoder_out, encoder_mask = self.model._forward_encoder( feat, feats_length) # (B, maxlen, encoder_dim) - maxlen = encoder_out.size(1) + maxlen = encoder_out.shape[1] ctc_probs = self.model.ctc.log_softmax( encoder_out) # (1, maxlen, vocab_size) diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index edcc34012..c39bfe31d 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -557,7 +557,7 @@ class U2Tester(U2Trainer): # 1. Encoder encoder_out, encoder_mask = self.model._forward_encoder( feat, feats_length) # (B, maxlen, encoder_dim) - maxlen = encoder_out.size(1) + maxlen = encoder_out.shape[1] ctc_probs = self.model.ctc.log_softmax( encoder_out) # (1, maxlen, vocab_size) diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py index 0fa8ed735..6c6e5243a 100644 --- a/deepspeech/exps/u2_st/model.py +++ b/deepspeech/exps/u2_st/model.py @@ -588,7 +588,7 @@ class U2STTester(U2STTrainer): # 1. Encoder encoder_out, encoder_mask = self.model._forward_encoder( feat, feats_length) # (B, maxlen, encoder_dim) - maxlen = encoder_out.size(1) + maxlen = encoder_out.shape[1] ctc_probs = self.model.ctc.log_softmax( encoder_out) # (1, maxlen, vocab_size) diff --git a/deepspeech/models/u2/u2.py b/deepspeech/models/u2/u2.py index 39ed9d5d1..46bbd102f 100644 --- a/deepspeech/models/u2/u2.py +++ b/deepspeech/models/u2/u2.py @@ -298,8 +298,8 @@ class U2BaseModel(nn.Layer): speech, speech_lengths, decoding_chunk_size, num_decoding_left_chunks, simulate_streaming) # (B, maxlen, encoder_dim) - maxlen = encoder_out.size(1) - encoder_dim = encoder_out.size(2) + maxlen = encoder_out.shape[1] + encoder_dim = encoder_out.shape[2] running_size = batch_size * beam_size encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view( running_size, maxlen, encoder_dim) # (B*N, maxlen, encoder_dim) @@ -404,7 +404,7 @@ class U2BaseModel(nn.Layer): encoder_out, encoder_mask = self._forward_encoder( speech, speech_lengths, decoding_chunk_size, num_decoding_left_chunks, simulate_streaming) - maxlen = encoder_out.size(1) + maxlen = encoder_out.shape[1] encoder_out_lens = encoder_mask.squeeze(1).sum(1) ctc_probs = self.ctc.log_softmax(encoder_out) # (B, maxlen, vocab_size) @@ -455,7 +455,7 @@ class U2BaseModel(nn.Layer): speech, speech_lengths, decoding_chunk_size, num_decoding_left_chunks, simulate_streaming) # (B, maxlen, encoder_dim) - maxlen = encoder_out.size(1) + maxlen = encoder_out.shape[1] ctc_probs = self.ctc.log_softmax(encoder_out) # (1, maxlen, vocab_size) ctc_probs = ctc_probs.squeeze(0) @@ -583,7 +583,7 @@ class U2BaseModel(nn.Layer): encoder_out = encoder_out.repeat(beam_size, 1, 1) encoder_mask = paddle.ones( - (beam_size, 1, encoder_out.size(1)), dtype=paddle.bool) + (beam_size, 1, encoder_out.shape[1]), dtype=paddle.bool) decoder_out, _ = self.decoder( encoder_out, encoder_mask, hyps_pad, hyps_lens) # (beam_size, max_hyps_len, vocab_size) @@ -690,13 +690,13 @@ class U2BaseModel(nn.Layer): Returns: paddle.Tensor: decoder output, (B, L) """ - assert encoder_out.size(0) == 1 - num_hyps = hyps.size(0) - assert hyps_lens.size(0) == num_hyps + assert encoder_out.shape[0] == 1 + num_hyps = hyps.shape[0] + assert hyps_lens.shape[0] == num_hyps encoder_out = encoder_out.repeat(num_hyps, 1, 1) # (B, 1, T) encoder_mask = paddle.ones( - [num_hyps, 1, encoder_out.size(1)], dtype=paddle.bool) + [num_hyps, 1, encoder_out.shape[1]], dtype=paddle.bool) # (num_hyps, max_hyps_len, vocab_size) decoder_out, _ = self.decoder(encoder_out, encoder_mask, hyps, hyps_lens) @@ -751,7 +751,7 @@ class U2BaseModel(nn.Layer): Returns: List[List[int]]: transcripts. """ - batch_size = feats.size(0) + batch_size = feats.shape[0] if decoding_method in ['ctc_prefix_beam_search', 'attention_rescoring'] and batch_size > 1: logger.fatal( @@ -779,7 +779,7 @@ class U2BaseModel(nn.Layer): # result in List[int], change it to List[List[int]] for compatible # with other batch decoding mode elif decoding_method == 'ctc_prefix_beam_search': - assert feats.size(0) == 1 + assert feats.shape[0] == 1 hyp = self.ctc_prefix_beam_search( feats, feats_lengths, @@ -789,7 +789,7 @@ class U2BaseModel(nn.Layer): simulate_streaming=simulate_streaming) hyps = [hyp] elif decoding_method == 'attention_rescoring': - assert feats.size(0) == 1 + assert feats.shape[0] == 1 hyp = self.attention_rescoring( feats, feats_lengths, diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py index 87ca68b29..a3d99942f 100644 --- a/deepspeech/models/u2_st.py +++ b/deepspeech/models/u2_st.py @@ -340,8 +340,8 @@ class U2STBaseModel(nn.Layer): speech, speech_lengths, decoding_chunk_size, num_decoding_left_chunks, simulate_streaming) # (B, maxlen, encoder_dim) - maxlen = encoder_out.size(1) - encoder_dim = encoder_out.size(2) + maxlen = encoder_out.shape[1] + encoder_dim = encoder_out.shape[2] running_size = batch_size * beam_size encoder_out = encoder_out.unsqueeze(1).repeat(1, beam_size, 1, 1).view( running_size, maxlen, encoder_dim) # (B*N, maxlen, encoder_dim) @@ -496,13 +496,13 @@ class U2STBaseModel(nn.Layer): Returns: paddle.Tensor: decoder output, (B, L) """ - assert encoder_out.size(0) == 1 - num_hyps = hyps.size(0) - assert hyps_lens.size(0) == num_hyps + assert encoder_out.shape[0] == 1 + num_hyps = hyps.shape[0] + assert hyps_lens.shape[0] == num_hyps encoder_out = encoder_out.repeat(num_hyps, 1, 1) # (B, 1, T) encoder_mask = paddle.ones( - [num_hyps, 1, encoder_out.size(1)], dtype=paddle.bool) + [num_hyps, 1, encoder_out.shape[1]], dtype=paddle.bool) # (num_hyps, max_hyps_len, vocab_size) decoder_out, _ = self.decoder(encoder_out, encoder_mask, hyps, hyps_lens) @@ -557,7 +557,7 @@ class U2STBaseModel(nn.Layer): Returns: List[List[int]]: transcripts. """ - batch_size = feats.size(0) + batch_size = feats.shape[0] if decoding_method == 'fullsentence': hyps = self.translate( diff --git a/deepspeech/modules/attention.py b/deepspeech/modules/attention.py index 1a984dd45..f94797282 100644 --- a/deepspeech/modules/attention.py +++ b/deepspeech/modules/attention.py @@ -70,7 +70,7 @@ class MultiHeadedAttention(nn.Layer): paddle.Tensor: Transformed value tensor, size (#batch, n_head, time2, d_k). """ - n_batch = query.size(0) + n_batch = query.shape[0] q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k) k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k) v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k) @@ -96,7 +96,7 @@ class MultiHeadedAttention(nn.Layer): paddle.Tensor: Transformed value weighted by the attention score, (#batch, time1, d_model). """ - n_batch = value.size(0) + n_batch = value.shape[0] if mask is not None: mask = mask.unsqueeze(1).eq(0) # (batch, 1, *, time2) scores = scores.masked_fill(mask, -float('inf')) @@ -172,15 +172,16 @@ class RelPositionMultiHeadedAttention(MultiHeadedAttention): paddle.Tensor: Output tensor. (batch, head, time1, time1) """ zero_pad = paddle.zeros( - (x.size(0), x.size(1), x.size(2), 1), dtype=x.dtype) + (x.shape[0], x.shape[1], x.shape[2], 1), dtype=x.dtype) x_padded = paddle.cat([zero_pad, x], dim=-1) - x_padded = x_padded.view(x.size(0), x.size(1), x.size(3) + 1, x.size(2)) + x_padded = x_padded.view(x.shape[0], x.shape[1], x.shape[3] + 1, + x.shape[2]) x = x_padded[:, :, 1:].view_as(x) # [B, H, T1, T1] if zero_triu: - ones = paddle.ones((x.size(2), x.size(3))) - x = x * paddle.tril(ones, x.size(3) - x.size(2))[None, None, :, :] + ones = paddle.ones((x.shape[2], x.shape[3])) + x = x * paddle.tril(ones, x.shape[3] - x.shape[2])[None, None, :, :] return x @@ -205,7 +206,7 @@ class RelPositionMultiHeadedAttention(MultiHeadedAttention): q, k, v = self.forward_qkv(query, key, value) q = q.transpose([0, 2, 1, 3]) # (batch, time1, head, d_k) - n_batch_pos = pos_emb.size(0) + n_batch_pos = pos_emb.shape[0] p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k) p = p.transpose([0, 2, 1, 3]) # (batch, head, time1, d_k) diff --git a/deepspeech/modules/decoder.py b/deepspeech/modules/decoder.py index 143f6cc57..8ca72894a 100644 --- a/deepspeech/modules/decoder.py +++ b/deepspeech/modules/decoder.py @@ -122,7 +122,7 @@ class TransformerDecoder(nn.Layer): # tgt_mask: (B, 1, L) tgt_mask = (make_non_pad_mask(ys_in_lens).unsqueeze(1)) # m: (1, L, L) - m = subsequent_mask(tgt_mask.size(-1)).unsqueeze(0) + m = subsequent_mask(tgt_mask.shape[-1]).unsqueeze(0) # tgt_mask: (B, L, L) tgt_mask = tgt_mask & m diff --git a/deepspeech/modules/embedding.py b/deepspeech/modules/embedding.py index 98b4e1291..fbbda023c 100644 --- a/deepspeech/modules/embedding.py +++ b/deepspeech/modules/embedding.py @@ -68,7 +68,7 @@ class PositionalEncoding(nn.Layer): paddle.Tensor: for compatibility to RelPositionalEncoding, (batch=1, time, ...) """ T = x.shape[1] - assert offset + x.size(1) < self.max_len + assert offset + x.shape[1] < self.max_len #TODO(Hui Zhang): using T = x.size(1), __getitem__ not support Tensor pos_emb = self.pe[:, offset:offset + T] x = x * self.xscale + pos_emb @@ -114,7 +114,7 @@ class RelPositionalEncoding(PositionalEncoding): paddle.Tensor: Encoded tensor (batch, time, `*`). paddle.Tensor: Positional embedding tensor (1, time, `*`). """ - assert offset + x.size(1) < self.max_len + assert offset + x.shape[1] < self.max_len x = x * self.xscale #TODO(Hui Zhang): using x.size(1), __getitem__ not support Tensor pos_emb = self.pe[:, offset:offset + x.shape[1]] diff --git a/deepspeech/modules/encoder.py b/deepspeech/modules/encoder.py index fc1ff3c83..d4a8275c3 100644 --- a/deepspeech/modules/encoder.py +++ b/deepspeech/modules/encoder.py @@ -206,11 +206,11 @@ class BaseEncoder(nn.Layer): chunk computation List[paddle.Tensor]: conformer cnn cache """ - assert xs.size(0) == 1 # batch size must be one + assert xs.shape[0] == 1 # batch size must be one # tmp_masks is just for interface compatibility # TODO(Hui Zhang): stride_slice not support bool tensor # tmp_masks = paddle.ones([1, xs.size(1)], dtype=paddle.bool) - tmp_masks = paddle.ones([1, xs.size(1)], dtype=paddle.int32) + tmp_masks = paddle.ones([1, xs.shape[1]], dtype=paddle.int32) tmp_masks = tmp_masks.unsqueeze(1) #[B=1, C=1, T] if self.global_cmvn is not None: @@ -220,25 +220,25 @@ class BaseEncoder(nn.Layer): xs, tmp_masks, offset=offset) #xs=(B, T, D), pos_emb=(B=1, T, D) if subsampling_cache is not None: - cache_size = subsampling_cache.size(1) #T + cache_size = subsampling_cache.shape[1] #T xs = paddle.cat((subsampling_cache, xs), dim=1) else: cache_size = 0 # only used when using `RelPositionMultiHeadedAttention` pos_emb = self.embed.position_encoding( - offset=offset - cache_size, size=xs.size(1)) + offset=offset - cache_size, size=xs.shape[1]) if required_cache_size < 0: next_cache_start = 0 elif required_cache_size == 0: - next_cache_start = xs.size(1) + next_cache_start = xs.shape[1] else: - next_cache_start = xs.size(1) - required_cache_size + next_cache_start = xs.shape[1] - required_cache_size r_subsampling_cache = xs[:, next_cache_start:, :] # Real mask for transformer/conformer layers - masks = paddle.ones([1, xs.size(1)], dtype=paddle.bool) + masks = paddle.ones([1, xs.shape[1]], dtype=paddle.bool) masks = masks.unsqueeze(1) #[B=1, L'=1, T] r_elayers_output_cache = [] r_conformer_cnn_cache = [] @@ -302,7 +302,7 @@ class BaseEncoder(nn.Layer): stride = subsampling * decoding_chunk_size decoding_window = (decoding_chunk_size - 1) * subsampling + context - num_frames = xs.size(1) + num_frames = xs.shape[1] required_cache_size = decoding_chunk_size * num_decoding_left_chunks subsampling_cache: Optional[paddle.Tensor] = None elayers_output_cache: Optional[List[paddle.Tensor]] = None @@ -318,10 +318,10 @@ class BaseEncoder(nn.Layer): chunk_xs, offset, required_cache_size, subsampling_cache, elayers_output_cache, conformer_cnn_cache) outputs.append(y) - offset += y.size(1) + offset += y.shape[1] ys = paddle.cat(outputs, 1) # fake mask, just for jit script and compatibility with `forward` api - masks = paddle.ones([1, ys.size(1)], dtype=paddle.bool) + masks = paddle.ones([1, ys.shape[1]], dtype=paddle.bool) masks = masks.unsqueeze(1) return ys, masks diff --git a/deepspeech/utils/ctc_utils.py b/deepspeech/utils/ctc_utils.py index 09543d48d..2639f3064 100644 --- a/deepspeech/utils/ctc_utils.py +++ b/deepspeech/utils/ctc_utils.py @@ -84,11 +84,11 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor, y_insert_blank = insert_blank(y, blank_id) #(2L+1) log_alpha = paddle.zeros( - (ctc_probs.size(0), len(y_insert_blank))) #(T, 2L+1) + (ctc_probs.shape[0], len(y_insert_blank))) #(T, 2L+1) log_alpha = log_alpha - float('inf') # log of zero # TODO(Hui Zhang): zeros not support paddle.int16 state_path = (paddle.zeros( - (ctc_probs.size(0), len(y_insert_blank)), dtype=paddle.int32) - 1 + (ctc_probs.shape[0], len(y_insert_blank)), dtype=paddle.int32) - 1 ) # state path, Tuple((T, 2L+1)) # init start state @@ -96,7 +96,7 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor, log_alpha[0, 0] = ctc_probs[0][int(y_insert_blank[0])] # State-b, Sb log_alpha[0, 1] = ctc_probs[0][int(y_insert_blank[1])] # State-nb, Snb - for t in range(1, ctc_probs.size(0)): # T + for t in range(1, ctc_probs.shape[0]): # T for s in range(len(y_insert_blank)): # 2L+1 if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[ s] == y_insert_blank[s - 2]: @@ -116,7 +116,7 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor, state_path[t, s] = prev_state[paddle.argmax(candidates)] # TODO(Hui Zhang): zeros not support paddle.int16 - state_seq = -1 * paddle.ones((ctc_probs.size(0), 1), dtype=paddle.int32) + state_seq = -1 * paddle.ones((ctc_probs.shape[0], 1), dtype=paddle.int32) candidates = paddle.to_tensor([ log_alpha[-1, len(y_insert_blank) - 1], # Sb @@ -124,11 +124,11 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor, ]) prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2] state_seq[-1] = prev_state[paddle.argmax(candidates)] - for t in range(ctc_probs.size(0) - 2, -1, -1): + for t in range(ctc_probs.shape[0] - 2, -1, -1): state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]] output_alignment = [] - for t in range(0, ctc_probs.size(0)): + for t in range(0, ctc_probs.shape[0]): output_alignment.append(y_insert_blank[state_seq[t, 0]]) return output_alignment diff --git a/deepspeech/utils/tensor_utils.py b/deepspeech/utils/tensor_utils.py index 3519f4fa5..bb7f58ded 100644 --- a/deepspeech/utils/tensor_utils.py +++ b/deepspeech/utils/tensor_utils.py @@ -83,7 +83,7 @@ def pad_sequence(sequences: List[paddle.Tensor], # (TODO Hui Zhang): slice not supprot `end==start` # trailing_dims = max_size[1:] trailing_dims = max_size[1:] if max_size.ndim >= 2 else () - max_len = max([s.size(0) for s in sequences]) + max_len = max([s.shape[0] for s in sequences]) if batch_first: out_dims = (len(sequences), max_len) + trailing_dims else: @@ -91,7 +91,7 @@ def pad_sequence(sequences: List[paddle.Tensor], out_tensor = sequences[0].new_full(out_dims, padding_value) for i, tensor in enumerate(sequences): - length = tensor.size(0) + length = tensor.shape[0] # use index notation to prevent duplicate references to the tensor if batch_first: out_tensor[i, :length, ...] = tensor @@ -139,7 +139,7 @@ def add_sos_eos(ys_pad: paddle.Tensor, sos: int, eos: int, #ys_in = [paddle.cat([_sos, y], dim=0) for y in ys] #ys_out = [paddle.cat([y, _eos], dim=0) for y in ys] #return pad_sequence(ys_in, padding_value=eos), pad_sequence(ys_out, padding_value=ignore_id) - B = ys_pad.size(0) + B = ys_pad.shape[0] _sos = paddle.ones([B, 1], dtype=ys_pad.dtype) * sos _eos = paddle.ones([B, 1], dtype=ys_pad.dtype) * eos ys_in = paddle.cat([_sos, ys_pad], dim=1) @@ -165,8 +165,8 @@ def th_accuracy(pad_outputs: paddle.Tensor, Returns: float: Accuracy value (0.0 - 1.0). """ - pad_pred = pad_outputs.view( - pad_targets.size(0), pad_targets.size(1), pad_outputs.size(1)).argmax(2) + pad_pred = pad_outputs.view(pad_targets.shape[0], pad_targets.shape[1], + pad_outputs.shape[1]).argmax(2) mask = pad_targets != ignore_label numerator = paddle.sum( pad_pred.masked_select(mask) == pad_targets.masked_select(mask)) From 15d26cc4addb62918a4ae5bc8c381350d811058d Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Sat, 18 Sep 2021 02:56:28 +0000 Subject: [PATCH 14/14] update u2 transformer config --- examples/librispeech/s1/conf/augmentation.json | 8 ++++---- examples/librispeech/s1/conf/transformer.yaml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/librispeech/s1/conf/augmentation.json b/examples/librispeech/s1/conf/augmentation.json index 8e6e97040..40a5b7900 100644 --- a/examples/librispeech/s1/conf/augmentation.json +++ b/examples/librispeech/s1/conf/augmentation.json @@ -19,17 +19,17 @@ { "type": "specaug", "params": { + "W": 0, + "warp_mode": "PIL", "F": 10, - "T": 50, "n_freq_masks": 2, + "T": 50, "n_time_masks": 2, "p": 1.0, - "W": 80, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true, - "warp_mode": "PIL" + "replace_with_zero": true }, "prob": 1.0 } diff --git a/examples/librispeech/s1/conf/transformer.yaml b/examples/librispeech/s1/conf/transformer.yaml index 4aa7b9158..fe9cab069 100644 --- a/examples/librispeech/s1/conf/transformer.yaml +++ b/examples/librispeech/s1/conf/transformer.yaml @@ -33,7 +33,7 @@ collator: keep_transcription_text: False sortagrad: True shuffle_method: batch_shuffle - num_workers: 0 + num_workers: 2 # network architecture @@ -74,7 +74,7 @@ model: training: - n_epoch: 120 + n_epoch: 120 accum_grad: 2 global_grad_clip: 5.0 optim: adam