【Hackathon 7th】fix whisper at Paddle 3.0 (#3880)

* fix whisper at Paddle 3.0 * fix whisper at Paddle 3.0 * fix whisper at Paddle 3.0 * fix lint * fix * fix whisper ci * Update TTSCppFrontend * Update utils * Update steps * Update utils * Update __init__.py * Update whisper.py * Update utils * Update utils
10 months ago · 5b3612f273
parent 4015676a42
commit 5b3612f273
7 changed files with 11 additions and 11 deletions
--- a/demos/TTSArmLinux/src/TTSCppFrontend
+++ b/demos/TTSArmLinux/src/TTSCppFrontend
@ -1 +1 @@
-../../TTSCppFrontend/
+../../TTSCppFrontend/
--- a/examples/aishell/asr0/utils
+++ b/examples/aishell/asr0/utils
@ -1 +1 @@
-../../../utils/
+../../../utils/
--- a/examples/librispeech/asr2/steps
+++ b/examples/librispeech/asr2/steps
@ -1 +1 @@
-../../../tools/kaldi/egs/wsj/s5/steps/
+../../../tools/kaldi/egs/wsj/s5/steps/
--- a/examples/voxceleb/sv0/utils
+++ b/examples/voxceleb/sv0/utils
@ -1 +1 @@
-../../../utils/
+../../../utils/
--- a/paddlespeech/s2t/models/whisper/whisper.py
+++ b/paddlespeech/s2t/models/whisper/whisper.py
@ -109,11 +109,11 @@ class MultiHeadAttention(nn.Layer):
        n_batch, n_ctx, n_state = q.shape
        scale = (n_state // self.n_head)**-0.25
        q = paddle.transpose(
-            q.view(*q.shape[:2], self.n_head, -1), (0, 2, 1, 3)) * scale
+            q.reshape([*q.shape[:2], self.n_head, -1]), (0, 2, 1, 3)) * scale
        k = paddle.transpose(
-            k.view(*k.shape[:2], self.n_head, -1), (0, 2, 3, 1)) * scale
+            k.reshape([*k.shape[:2], self.n_head, -1]), (0, 2, 3, 1)) * scale
        v = paddle.transpose(
-            v.view(*v.shape[:2], self.n_head, -1), (0, 2, 1, 3))
+            v.reshape([*v.shape[:2], self.n_head, -1]), (0, 2, 1, 3))

        qk = q @ k
        if mask is not None:
@ -823,7 +823,7 @@ class BeamSearchDecoder(TokenDecoder):
        if self.finished_sequences is None:  # for the first update
            self.finished_sequences = [{} for _ in range(batch_size)]

-        logprobs = F.log_softmax(logits, axis=-1, dtype=paddle.float32)
+        logprobs = F.log_softmax(logits, axis=-1, dtype='float32')
        next_tokens, source_indices, finished_sequences = [], [], []
        for i in range(batch_size):
            scores, sources, finished = {}, {}, {}
@ -969,7 +969,7 @@ class ApplyTimestampRules(LogitFilter):
            logits[:, last_allowed + 1:] = -np.inf

        # if sum of probability over timestamps is above any other token, sample timestamp
-        logprobs = F.log_softmax(logits, axis=-1, dtype=paddle.float32)
+        logprobs = F.log_softmax(logits, axis=-1, dtype='float32')
        for k in range(tokens.shape[0]):
            # When using paddle.logsumexp on a 32GB Tesla-V100 GPU, we encountered CUDA error 700. 
            # To bypass this issue in CI, we have decomposed the operation into separate steps. 
--- a/runtime/examples/text_lm/utils
+++ b/runtime/examples/text_lm/utils
@ -1 +1 @@
-../../../utils/
+../../../utils/
--- a/runtime/examples/u2pp_ol/wenetspeech/utils
+++ b/runtime/examples/u2pp_ol/wenetspeech/utils
@ -1 +1 @@
-../../../../utils/
+../../../../utils/