From 65346b83ee440113803be5a6b9d546a3e3528028 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 21 May 2021 09:43:59 +0000 Subject: [PATCH] remove sequnce_mask --- deepspeech/__init__.py | 1 + deepspeech/modules/conv.py | 6 ++++-- deepspeech/modules/mask.py | 34 +++------------------------------- deepspeech/modules/rnn.py | 6 ++++-- tests/mask_test.py | 7 ------- 5 files changed, 12 insertions(+), 42 deletions(-) diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py index c942de0cf..ac9ccdc77 100644 --- a/deepspeech/__init__.py +++ b/deepspeech/__init__.py @@ -421,6 +421,7 @@ logger.warn( ) F.ctc_loss = ctc_loss + ########### hcak paddle.nn ############# if not hasattr(paddle.nn, 'Module'): logger.warn("register user Module to paddle.nn, remove this when fixed!") diff --git a/deepspeech/modules/conv.py b/deepspeech/modules/conv.py index f0f0d7463..111f5d3b4 100644 --- a/deepspeech/modules/conv.py +++ b/deepspeech/modules/conv.py @@ -15,7 +15,7 @@ from paddle import nn from paddle.nn import functional as F from deepspeech.modules.activation import brelu -from deepspeech.modules.mask import sequence_mask +from deepspeech.modules.mask import make_non_pad_mask from deepspeech.utils.log import Log logger = Log(__name__).getlog() @@ -111,8 +111,10 @@ class ConvBn(nn.Layer): ) // self.stride[1] + 1 # reset padding part to 0 - masks = sequence_mask(x_len) #[B, T] + masks = make_non_pad_mask(x_len) #[B, T] masks = masks.unsqueeze(1).unsqueeze(1) # [B, 1, 1, T] + # TODO(Hui Zhang): not support bool multiply + masks = masks.type_as(x) x = x.multiply(masks) return x, x_len diff --git a/deepspeech/modules/mask.py b/deepspeech/modules/mask.py index 65a8ba316..c506f127b 100644 --- a/deepspeech/modules/mask.py +++ b/deepspeech/modules/mask.py @@ -18,40 +18,11 @@ from deepspeech.utils.log import Log logger = Log(__name__).getlog() __all__ = [ - 'sequence_mask', "make_pad_mask", "make_non_pad_mask", "subsequent_mask", + "make_pad_mask", "make_non_pad_mask", "subsequent_mask", "subsequent_chunk_mask", "add_optional_chunk_mask", "mask_finished_scores", "mask_finished_preds" ] - -def sequence_mask(x_len, max_len=None, dtype='float32'): - """batch sequence mask. - - Args: - x_len ([paddle.Tensor]): xs lenght, [B] - max_len ([type], optional): max sequence length. Defaults to None. - dtype (str, optional): mask data type. Defaults to 'float32'. - - Returns: - paddle.Tensor: [B, Tmax] - - Examples: - >>> sequence_mask([2, 4]) - [[1., 1., 0., 0.], - [1., 1., 1., 1.]] - """ - # (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim - # assert x_len.dim() == 1, (x_len.dim(), x_len) - max_len = max_len or x_len.max() - x_len = paddle.unsqueeze(x_len, -1) - row_vector = paddle.arange(max_len) - # TODO(Hui Zhang): fix this bug - #mask = row_vector < x_len - mask = row_vector > x_len # a bug, broadcast 的时候出错了 - mask = paddle.cast(mask, dtype) - return mask - - def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor: """Make mask tensor containing indices of padded part. See description of make_non_pad_mask. @@ -66,7 +37,8 @@ def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor: [0, 0, 0, 1, 1], [0, 0, 1, 1, 1]] """ - assert lengths.dim() == 1 + # (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim + # assert lengths.dim() == 1 batch_size = int(lengths.shape[0]) max_len = int(lengths.max()) seq_range = paddle.arange(0, max_len, dtype=paddle.int64) diff --git a/deepspeech/modules/rnn.py b/deepspeech/modules/rnn.py index cef731e35..29bd28839 100644 --- a/deepspeech/modules/rnn.py +++ b/deepspeech/modules/rnn.py @@ -19,7 +19,7 @@ from paddle.nn import functional as F from paddle.nn import initializer as I from deepspeech.modules.activation import brelu -from deepspeech.modules.mask import sequence_mask +from deepspeech.modules.mask import make_non_pad_mask from deepspeech.utils.log import Log logger = Log(__name__).getlog() @@ -306,7 +306,9 @@ class RNNStack(nn.Layer): """ for i, rnn in enumerate(self.rnn_stacks): x, x_len = rnn(x, x_len) - masks = sequence_mask(x_len) #[B, T] + masks = make_non_pad_mask(x_len) #[B, T] masks = masks.unsqueeze(-1) # [B, T, 1] + # TODO(Hui Zhang): not support bool multiply + masks = masks.type_as(x) x = x.multiply(masks) return x, x_len diff --git a/tests/mask_test.py b/tests/mask_test.py index cd37a899a..f44aca8fc 100644 --- a/tests/mask_test.py +++ b/tests/mask_test.py @@ -18,7 +18,6 @@ import paddle from deepspeech.modules.mask import make_non_pad_mask from deepspeech.modules.mask import make_pad_mask -from deepspeech.modules.mask import sequence_mask class TestU2Model(unittest.TestCase): @@ -36,16 +35,10 @@ class TestU2Model(unittest.TestCase): [False, False, True, True, True], ]) - def test_sequence_mask(self): - res = sequence_mask(self.lengths, dtype='bool') - self.assertSequenceEqual(res.numpy().tolist(), self.masks.tolist()) - def test_make_non_pad_mask(self): res = make_non_pad_mask(self.lengths) - res1 = sequence_mask(self.lengths, dtype='bool') res2 = make_pad_mask(self.lengths).logical_not() self.assertSequenceEqual(res.numpy().tolist(), self.masks.tolist()) - self.assertSequenceEqual(res.numpy().tolist(), res1.numpy().tolist()) self.assertSequenceEqual(res.numpy().tolist(), res2.numpy().tolist()) def test_make_pad_mask(self):