diff --git a/paddlespeech/s2t/models/whisper/__init__.py b/paddlespeech/s2t/models/whisper/__init__.py index b78dece8a..3f371c1e5 100644 --- a/paddlespeech/s2t/models/whisper/__init__.py +++ b/paddlespeech/s2t/models/whisper/__init__.py @@ -2,11 +2,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/__init__.py) -from paddlespeech.s2t.models.whisper.whipser import decode -from paddlespeech.s2t.models.whisper.whipser import DecodingOptions -from paddlespeech.s2t.models.whisper.whipser import DecodingResult -from paddlespeech.s2t.models.whisper.whipser import detect_language -from paddlespeech.s2t.models.whisper.whipser import log_mel_spectrogram -from paddlespeech.s2t.models.whisper.whipser import ModelDimensions -from paddlespeech.s2t.models.whisper.whipser import transcribe -from paddlespeech.s2t.models.whisper.whipser import Whisper +from paddlespeech.s2t.models.whisper.whisper import decode +from paddlespeech.s2t.models.whisper.whisper import DecodingOptions +from paddlespeech.s2t.models.whisper.whisper import DecodingResult +from paddlespeech.s2t.models.whisper.whisper import detect_language +from paddlespeech.s2t.models.whisper.whisper import log_mel_spectrogram +from paddlespeech.s2t.models.whisper.whisper import ModelDimensions +from paddlespeech.s2t.models.whisper.whisper import transcribe +from paddlespeech.s2t.models.whisper.whisper import Whisper diff --git a/paddlespeech/s2t/models/whisper/whipser.py b/paddlespeech/s2t/models/whisper/whisper.py similarity index 99% rename from paddlespeech/s2t/models/whisper/whipser.py rename to paddlespeech/s2t/models/whisper/whisper.py index a28013e4b..bb5ec8488 100644 --- a/paddlespeech/s2t/models/whisper/whipser.py +++ b/paddlespeech/s2t/models/whisper/whisper.py @@ -17,11 +17,12 @@ from typing import Union import numpy as np import paddle import paddle.nn.functional as F -import paddlespeech.s2t.modules.align as paddlespeech_nn import soundfile import tqdm from paddle import nn from paddle.distribution import Categorical + +import paddlespeech.s2t.modules.align as paddlespeech_nn from paddlespeech.s2t.models.whisper import utils from paddlespeech.s2t.models.whisper.tokenizer import get_tokenizer from paddlespeech.s2t.models.whisper.tokenizer import LANGUAGES @@ -475,8 +476,8 @@ def transcribe( if dtype == np.float32: decode_options["fp16"] = False - if decode_options.get( - "language") == 'None' or decode_options.get("language", None) is None: + if decode_options.get("language") == 'None' or decode_options.get( + "language", None) is None: if not model.is_multilingual: decode_options["language"] = "en" else: @@ -1206,8 +1207,9 @@ class DecodingTask: DecodingResult( audio_features=features, language=language, - language_probs=probs) for features, language, probs in - zip(audio_features, languages, language_probs) + language_probs=probs) + for features, language, probs in zip(audio_features, languages, + language_probs) ] # repeat the audio & text tensors by the group size, for beam search or best-of-n sampling