From 1fbb41801b149045fc468bccd12961c54b3b0752 Mon Sep 17 00:00:00 2001 From: yzz Date: Mon, 9 Jun 2025 17:39:58 +0800 Subject: [PATCH] Feat: npu supported for 'vector' --- paddlespeech/vector/models/ecapa_tdnn.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/paddlespeech/vector/models/ecapa_tdnn.py b/paddlespeech/vector/models/ecapa_tdnn.py index 895ff13f4..66518f3a3 100644 --- a/paddlespeech/vector/models/ecapa_tdnn.py +++ b/paddlespeech/vector/models/ecapa_tdnn.py @@ -66,7 +66,12 @@ class Conv1d(nn.Layer): self.stride = stride self.dilation = dilation self.padding = padding - self.padding_mode = padding_mode + + # padding_mode is forcibly set to 'constant' when using the npu device because npu only support mode=constant right now + if paddle.get_device().startswith('npu'): + self.padding_mode = 'constant' + else: + self.padding_mode = padding_mode self.conv = nn.Conv1D( in_channels, @@ -335,10 +340,16 @@ class AttentiveStatisticsPooling(nn.Layer): # Apply layers attn = self.conv(self.tanh(self.tdnn(attn))) + if paddle.get_device().startswith('npu'): + # The following way is designed to fix the 'Broadcast dimension mismatch' error + # that occurs when using the npu device and setting padding_mode to 'constant'. + inf_tensor = paddle.full_like(attn, float("-inf")) + else: + # the default way + inf_tensor = paddle.ones_like(attn) * float("-inf") + # Filter out zero-paddings - attn = paddle.where( - mask.tile((1, C, 1)) == 0, - paddle.ones_like(attn) * float("-inf"), attn) + attn = paddle.where(mask.tile((1, C, 1)) == 0, inf_tensor, attn) attn = F.softmax(attn, axis=2) mean, std = _compute_statistics(x, attn)