From 5b3612f27300fa5e7a2bc9e62cd85ba6c5c8c5b1 Mon Sep 17 00:00:00 2001 From: yinfan98 <1106310035@qq.com> Date: Mon, 2 Dec 2024 11:05:49 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Hackathon=207th=E3=80=91fix=20whisper?= =?UTF-8?q?=20at=20Paddle=203.0=20(#3880)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix whisper at Paddle 3.0 * fix whisper at Paddle 3.0 * fix whisper at Paddle 3.0 * fix lint * fix * fix whisper ci * Update TTSCppFrontend * Update utils * Update steps * Update utils * Update __init__.py * Update whisper.py * Update utils * Update utils --- demos/TTSArmLinux/src/TTSCppFrontend | 2 +- examples/aishell/asr0/utils | 2 +- examples/librispeech/asr2/steps | 2 +- examples/voxceleb/sv0/utils | 2 +- paddlespeech/s2t/models/whisper/whisper.py | 10 +++++----- runtime/examples/text_lm/utils | 2 +- runtime/examples/u2pp_ol/wenetspeech/utils | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/demos/TTSArmLinux/src/TTSCppFrontend b/demos/TTSArmLinux/src/TTSCppFrontend index 25953976..82098540 120000 --- a/demos/TTSArmLinux/src/TTSCppFrontend +++ b/demos/TTSArmLinux/src/TTSCppFrontend @@ -1 +1 @@ -../../TTSCppFrontend/ \ No newline at end of file +../../TTSCppFrontend/ diff --git a/examples/aishell/asr0/utils b/examples/aishell/asr0/utils index 256f914a..94d118d2 120000 --- a/examples/aishell/asr0/utils +++ b/examples/aishell/asr0/utils @@ -1 +1 @@ -../../../utils/ \ No newline at end of file +../../../utils/ diff --git a/examples/librispeech/asr2/steps b/examples/librispeech/asr2/steps index 995eeccb..7cb6e568 120000 --- a/examples/librispeech/asr2/steps +++ b/examples/librispeech/asr2/steps @@ -1 +1 @@ -../../../tools/kaldi/egs/wsj/s5/steps/ \ No newline at end of file +../../../tools/kaldi/egs/wsj/s5/steps/ diff --git a/examples/voxceleb/sv0/utils b/examples/voxceleb/sv0/utils index 256f914a..94d118d2 120000 --- a/examples/voxceleb/sv0/utils +++ b/examples/voxceleb/sv0/utils @@ -1 +1 @@ -../../../utils/ \ No newline at end of file +../../../utils/ diff --git a/paddlespeech/s2t/models/whisper/whisper.py b/paddlespeech/s2t/models/whisper/whisper.py index 9925e7cd..d20cc04b 100644 --- a/paddlespeech/s2t/models/whisper/whisper.py +++ b/paddlespeech/s2t/models/whisper/whisper.py @@ -109,11 +109,11 @@ class MultiHeadAttention(nn.Layer): n_batch, n_ctx, n_state = q.shape scale = (n_state // self.n_head)**-0.25 q = paddle.transpose( - q.view(*q.shape[:2], self.n_head, -1), (0, 2, 1, 3)) * scale + q.reshape([*q.shape[:2], self.n_head, -1]), (0, 2, 1, 3)) * scale k = paddle.transpose( - k.view(*k.shape[:2], self.n_head, -1), (0, 2, 3, 1)) * scale + k.reshape([*k.shape[:2], self.n_head, -1]), (0, 2, 3, 1)) * scale v = paddle.transpose( - v.view(*v.shape[:2], self.n_head, -1), (0, 2, 1, 3)) + v.reshape([*v.shape[:2], self.n_head, -1]), (0, 2, 1, 3)) qk = q @ k if mask is not None: @@ -823,7 +823,7 @@ class BeamSearchDecoder(TokenDecoder): if self.finished_sequences is None: # for the first update self.finished_sequences = [{} for _ in range(batch_size)] - logprobs = F.log_softmax(logits, axis=-1, dtype=paddle.float32) + logprobs = F.log_softmax(logits, axis=-1, dtype='float32') next_tokens, source_indices, finished_sequences = [], [], [] for i in range(batch_size): scores, sources, finished = {}, {}, {} @@ -969,7 +969,7 @@ class ApplyTimestampRules(LogitFilter): logits[:, last_allowed + 1:] = -np.inf # if sum of probability over timestamps is above any other token, sample timestamp - logprobs = F.log_softmax(logits, axis=-1, dtype=paddle.float32) + logprobs = F.log_softmax(logits, axis=-1, dtype='float32') for k in range(tokens.shape[0]): # When using paddle.logsumexp on a 32GB Tesla-V100 GPU, we encountered CUDA error 700. # To bypass this issue in CI, we have decomposed the operation into separate steps. diff --git a/runtime/examples/text_lm/utils b/runtime/examples/text_lm/utils index 256f914a..94d118d2 120000 --- a/runtime/examples/text_lm/utils +++ b/runtime/examples/text_lm/utils @@ -1 +1 @@ -../../../utils/ \ No newline at end of file +../../../utils/ diff --git a/runtime/examples/u2pp_ol/wenetspeech/utils b/runtime/examples/u2pp_ol/wenetspeech/utils index c2519a9d..758320d4 120000 --- a/runtime/examples/u2pp_ol/wenetspeech/utils +++ b/runtime/examples/u2pp_ol/wenetspeech/utils @@ -1 +1 @@ -../../../../utils/ \ No newline at end of file +../../../../utils/