parent
fcd91c62d0
commit
b31a1f46d9
@ -0,0 +1,56 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
import paddle
|
||||
from paddle.optimizer.lr import LRScheduler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ["WarmupLR"]
|
||||
|
||||
|
||||
class WarmupLR(LRScheduler):
|
||||
"""The WarmupLR scheduler
|
||||
This scheduler is almost same as NoamLR Scheduler except for following
|
||||
difference:
|
||||
NoamLR:
|
||||
lr = optimizer.lr * model_size ** -0.5
|
||||
* min(step ** -0.5, step * warmup_step ** -1.5)
|
||||
WarmupLR:
|
||||
lr = optimizer.lr * warmup_step ** 0.5
|
||||
* min(step ** -0.5, step * warmup_step ** -1.5)
|
||||
Note that the maximum lr equals to optimizer.lr in this scheduler.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
warmup_steps: Union[int, float]=25000,
|
||||
learning_rate=1.0,
|
||||
last_epoch=-1,
|
||||
verbose=False):
|
||||
assert check_argument_types()
|
||||
self.warmup_steps = warmup_steps
|
||||
super().__init__(learning_rate, last_epoch, verbose)
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps})"
|
||||
|
||||
def get_lr(self):
|
||||
step_num = self.last_epoch + 1
|
||||
return self.base_lr * self.warmup_steps**0.5 * min(
|
||||
step_num**-0.5, step_num * self.warmup_steps**-1.5)
|
||||
|
||||
def set_step(self, step: int):
|
||||
self.last_epoch = step
|
@ -0,0 +1,93 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import math
|
||||
import logging
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ['load_cmvn']
|
||||
|
||||
|
||||
def _load_json_cmvn(json_cmvn_file):
|
||||
""" Load the json format cmvn stats file and calculate cmvn
|
||||
Args:
|
||||
json_cmvn_file: cmvn stats file in json format
|
||||
Returns:
|
||||
a numpy array of [means, vars]
|
||||
"""
|
||||
with open(json_cmvn_file) as f:
|
||||
cmvn_stats = json.load(f)
|
||||
|
||||
means = cmvn_stats['mean_stat']
|
||||
variance = cmvn_stats['var_stat']
|
||||
count = cmvn_stats['frame_num']
|
||||
for i in range(len(means)):
|
||||
means[i] /= count
|
||||
variance[i] = variance[i] / count - means[i] * means[i]
|
||||
if variance[i] < 1.0e-20:
|
||||
variance[i] = 1.0e-20
|
||||
variance[i] = 1.0 / math.sqrt(variance[i])
|
||||
cmvn = np.array([means, variance])
|
||||
return cmvn
|
||||
|
||||
|
||||
def _load_kaldi_cmvn(kaldi_cmvn_file):
|
||||
""" Load the kaldi format cmvn stats file and calculate cmvn
|
||||
Args:
|
||||
kaldi_cmvn_file: kaldi text style global cmvn file, which
|
||||
is generated by:
|
||||
compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
|
||||
Returns:
|
||||
a numpy array of [means, vars]
|
||||
"""
|
||||
means = []
|
||||
variance = []
|
||||
with open(kaldi_cmvn_file, 'r') as fid:
|
||||
# kaldi binary file start with '\0B'
|
||||
if fid.read(2) == '\0B':
|
||||
logger.error('kaldi cmvn binary file is not supported, please '
|
||||
'recompute it by: compute-cmvn-stats --binary=false '
|
||||
' scp:feats.scp global_cmvn')
|
||||
sys.exit(1)
|
||||
fid.seek(0)
|
||||
arr = fid.read().split()
|
||||
assert (arr[0] == '[')
|
||||
assert (arr[-2] == '0')
|
||||
assert (arr[-1] == ']')
|
||||
feat_dim = int((len(arr) - 2 - 2) / 2)
|
||||
for i in range(1, feat_dim + 1):
|
||||
means.append(float(arr[i]))
|
||||
count = float(arr[feat_dim + 1])
|
||||
for i in range(feat_dim + 2, 2 * feat_dim + 2):
|
||||
variance.append(float(arr[i]))
|
||||
|
||||
for i in range(len(means)):
|
||||
means[i] /= count
|
||||
variance[i] = variance[i] / count - means[i] * means[i]
|
||||
if variance[i] < 1.0e-20:
|
||||
variance[i] = 1.0e-20
|
||||
variance[i] = 1.0 / math.sqrt(variance[i])
|
||||
cmvn = np.array([means, variance])
|
||||
return cmvn
|
||||
|
||||
|
||||
def load_cmvn(cmvn_file, is_json):
|
||||
if is_json:
|
||||
cmvn = _load_json_cmvn(cmvn_file)
|
||||
else:
|
||||
cmvn = _load_kaldi_cmvn(cmvn_file)
|
||||
return cmvn[0], cmvn[1]
|
@ -0,0 +1,128 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
from typing import List
|
||||
|
||||
import paddle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ["forced_align", "remove_duplicates_and_blank", "insert_blank"]
|
||||
|
||||
|
||||
def remove_duplicates_and_blank(hyp: List[int], blank_id=0) -> List[int]:
|
||||
"""ctc alignment to ctc label ids.
|
||||
|
||||
"abaa-acee-" -> "abaace"
|
||||
|
||||
Args:
|
||||
hyp (List[int]): hypotheses ids, (L)
|
||||
blank_id (int, optional): blank id. Defaults to 0.
|
||||
|
||||
Returns:
|
||||
List[int]: remove dupicate ids, then remove blank id.
|
||||
"""
|
||||
new_hyp: List[int] = []
|
||||
cur = 0
|
||||
while cur < len(hyp):
|
||||
if hyp[cur] != blank_id:
|
||||
new_hyp.append(hyp[cur])
|
||||
prev = cur
|
||||
while cur < len(hyp) and hyp[cur] == hyp[prev]:
|
||||
cur += 1
|
||||
return new_hyp
|
||||
|
||||
|
||||
def insert_blank(label: np.ndarray, blank_id: int=0):
|
||||
"""Insert blank token between every two label token.
|
||||
|
||||
"abcdefg" -> "-a-b-c-d-e-f-g-"
|
||||
|
||||
Args:
|
||||
label ([np.ndarray]): label ids, (L).
|
||||
blank_id (int, optional): blank id. Defaults to 0.
|
||||
|
||||
Returns:
|
||||
[np.ndarray]: (2L+1).
|
||||
"""
|
||||
label = np.expand_dims(label, 1) #[L, 1]
|
||||
blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
|
||||
label = np.concatenate([blanks, label], axis=1) #[L, 2]
|
||||
label = label.reshape(-1) #[2L]
|
||||
label = np.append(label, label[0]) #[2L + 1]
|
||||
return label
|
||||
|
||||
|
||||
def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
|
||||
blank_id=0) -> list:
|
||||
"""ctc forced alignment.
|
||||
|
||||
https://distill.pub/2017/ctc/
|
||||
|
||||
Args:
|
||||
ctc_probs (paddle.Tensor): hidden state sequence, 2d tensor (T, D)
|
||||
y (paddle.Tensor): label id sequence tensor, 1d tensor (L)
|
||||
blank_id (int): blank symbol index
|
||||
Returns:
|
||||
paddle.Tensor: best alignment result, (T).
|
||||
"""
|
||||
y_insert_blank = insert_blank(y, blank_id)
|
||||
|
||||
log_alpha = paddle.zeros(
|
||||
(ctc_probs.size(0), len(y_insert_blank))) #(T, 2L+1)
|
||||
log_alpha = log_alpha - float('inf') # log of zero
|
||||
state_path = (paddle.zeros(
|
||||
(ctc_probs.size(0), len(y_insert_blank)), dtype=paddle.int16) - 1
|
||||
) # state path
|
||||
|
||||
# init start state
|
||||
log_alpha[0, 0] = ctc_probs[0][y_insert_blank[0]] # Sb
|
||||
log_alpha[0, 1] = ctc_probs[0][y_insert_blank[1]] # Snb
|
||||
|
||||
for t in range(1, ctc_probs.size(0)):
|
||||
for s in range(len(y_insert_blank)):
|
||||
if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
|
||||
s] == y_insert_blank[s - 2]:
|
||||
candidates = paddle.to_tensor(
|
||||
[log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
|
||||
prev_state = [s, s - 1]
|
||||
else:
|
||||
candidates = paddle.to_tensor([
|
||||
log_alpha[t - 1, s],
|
||||
log_alpha[t - 1, s - 1],
|
||||
log_alpha[t - 1, s - 2],
|
||||
])
|
||||
prev_state = [s, s - 1, s - 2]
|
||||
log_alpha[t, s] = paddle.max(candidates) + ctc_probs[t][
|
||||
y_insert_blank[s]]
|
||||
state_path[t, s] = prev_state[paddle.argmax(candidates)]
|
||||
|
||||
state_seq = -1 * paddle.ones((ctc_probs.size(0), 1), dtype=paddle.int16)
|
||||
|
||||
candidates = paddle.to_tensor([
|
||||
log_alpha[-1, len(y_insert_blank) - 1], # Sb
|
||||
log_alpha[-1, len(y_insert_blank) - 2] # Snb
|
||||
])
|
||||
prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
|
||||
state_seq[-1] = prev_state[paddle.argmax(candidates)]
|
||||
for t in range(ctc_probs.size(0) - 2, -1, -1):
|
||||
state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
|
||||
|
||||
output_alignment = []
|
||||
for t in range(0, ctc_probs.size(0)):
|
||||
output_alignment.append(y_insert_blank[state_seq[t, 0]])
|
||||
|
||||
return output_alignment
|
@ -1,43 +0,0 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import math
|
||||
import logging
|
||||
from typing import Tuple, List
|
||||
|
||||
import paddle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ["th_accuracy"]
|
||||
|
||||
|
||||
def th_accuracy(pad_outputs: paddle.Tensor,
|
||||
pad_targets: paddle.Tensor,
|
||||
ignore_label: int) -> float:
|
||||
"""Calculate accuracy.
|
||||
Args:
|
||||
pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
|
||||
pad_targets (LongTensor): Target label tensors (B, Lmax, D).
|
||||
ignore_label (int): Ignore label id.
|
||||
Returns:
|
||||
float: Accuracy value (0.0 - 1.0).
|
||||
"""
|
||||
pad_pred = pad_outputs.view(
|
||||
pad_targets.size(0), pad_targets.size(1), pad_outputs.size(1)).argmax(2)
|
||||
mask = pad_targets != ignore_label
|
||||
numerator = paddle.sum(
|
||||
pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
|
||||
denominator = paddle.sum(mask)
|
||||
return float(numerator) / float(denominator)
|
Loading…
Reference in new issue