【Hackathon 7th】fix whisper at Paddle 3.0 (#3880)

* fix whisper at Paddle 3.0

* fix whisper at Paddle 3.0

* fix whisper at Paddle 3.0

* fix lint

* fix

* fix whisper ci

* Update TTSCppFrontend

* Update utils

* Update steps

* Update utils

* Update __init__.py

* Update whisper.py

* Update utils

* Update utils
pull/3923/head
yinfan98 3 weeks ago committed by GitHub
parent 4015676a42
commit 5b3612f273
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1 +1 @@
../../TTSCppFrontend/ ../../TTSCppFrontend/

@ -1 +1 @@
../../../utils/ ../../../utils/

@ -1 +1 @@
../../../tools/kaldi/egs/wsj/s5/steps/ ../../../tools/kaldi/egs/wsj/s5/steps/

@ -1 +1 @@
../../../utils/ ../../../utils/

@ -109,11 +109,11 @@ class MultiHeadAttention(nn.Layer):
n_batch, n_ctx, n_state = q.shape n_batch, n_ctx, n_state = q.shape
scale = (n_state // self.n_head)**-0.25 scale = (n_state // self.n_head)**-0.25
q = paddle.transpose( q = paddle.transpose(
q.view(*q.shape[:2], self.n_head, -1), (0, 2, 1, 3)) * scale q.reshape([*q.shape[:2], self.n_head, -1]), (0, 2, 1, 3)) * scale
k = paddle.transpose( k = paddle.transpose(
k.view(*k.shape[:2], self.n_head, -1), (0, 2, 3, 1)) * scale k.reshape([*k.shape[:2], self.n_head, -1]), (0, 2, 3, 1)) * scale
v = paddle.transpose( v = paddle.transpose(
v.view(*v.shape[:2], self.n_head, -1), (0, 2, 1, 3)) v.reshape([*v.shape[:2], self.n_head, -1]), (0, 2, 1, 3))
qk = q @ k qk = q @ k
if mask is not None: if mask is not None:
@ -823,7 +823,7 @@ class BeamSearchDecoder(TokenDecoder):
if self.finished_sequences is None: # for the first update if self.finished_sequences is None: # for the first update
self.finished_sequences = [{} for _ in range(batch_size)] self.finished_sequences = [{} for _ in range(batch_size)]
logprobs = F.log_softmax(logits, axis=-1, dtype=paddle.float32) logprobs = F.log_softmax(logits, axis=-1, dtype='float32')
next_tokens, source_indices, finished_sequences = [], [], [] next_tokens, source_indices, finished_sequences = [], [], []
for i in range(batch_size): for i in range(batch_size):
scores, sources, finished = {}, {}, {} scores, sources, finished = {}, {}, {}
@ -969,7 +969,7 @@ class ApplyTimestampRules(LogitFilter):
logits[:, last_allowed + 1:] = -np.inf logits[:, last_allowed + 1:] = -np.inf
# if sum of probability over timestamps is above any other token, sample timestamp # if sum of probability over timestamps is above any other token, sample timestamp
logprobs = F.log_softmax(logits, axis=-1, dtype=paddle.float32) logprobs = F.log_softmax(logits, axis=-1, dtype='float32')
for k in range(tokens.shape[0]): for k in range(tokens.shape[0]):
# When using paddle.logsumexp on a 32GB Tesla-V100 GPU, we encountered CUDA error 700. # When using paddle.logsumexp on a 32GB Tesla-V100 GPU, we encountered CUDA error 700.
# To bypass this issue in CI, we have decomposed the operation into separate steps. # To bypass this issue in CI, we have decomposed the operation into separate steps.

@ -1 +1 @@
../../../utils/ ../../../utils/

@ -1 +1 @@
../../../../utils/ ../../../../utils/

Loading…
Cancel
Save