diff --git a/demos/whisper/README.md b/demos/whisper/README.md
index d332fca13..455bca92b 100644
--- a/demos/whisper/README.md
+++ b/demos/whisper/README.md
@@ -65,7 +65,7 @@ Whisper model trained by OpenAI whisper https://github.com/openai/whisper
    print('ASR Result: \n{}'.format(text))
 
    # to recognize text and translate to English
-   feature = ssl_executor(
+   feature = whisper_executor(
        model='whisper-large',
        task='translate',
        sample_rate=16000,
@@ -86,4 +86,4 @@ Whisper model trained by OpenAI whisper https://github.com/openai/whisper
    Translate Result:
    Detected language: Chinese
    [00:00.000 --> 00:05.000]  I think the most important thing about running is that it brings me good health.
-   {'text': ' I think the most important thing about running is that it brings me good health.', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': ' I think the most important thing about running is that it brings me good health.', 'tokens': [50364, 286, 519, 264, 881, 1021, 551, 466, 2614, 307, 300, 309, 5607, 385, 665, 1585, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.47945233395225123, 'compression_ratio': 1.095890410958904, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}
\ No newline at end of file
+   {'text': ' I think the most important thing about running is that it brings me good health.', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': ' I think the most important thing about running is that it brings me good health.', 'tokens': [50364, 286, 519, 264, 881, 1021, 551, 466, 2614, 307, 300, 309, 5607, 385, 665, 1585, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.47945233395225123, 'compression_ratio': 1.095890410958904, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}
diff --git a/demos/whisper/README_cn.md b/demos/whisper/README_cn.md
index e559ba041..784952761 100644
--- a/demos/whisper/README_cn.md
+++ b/demos/whisper/README_cn.md
@@ -66,7 +66,7 @@ Whisper模型由OpenAI Whisper训练 https://github.com/openai/whisper
    print('ASR Result: \n{}'.format(text))
 
     # 将语音翻译成英语
-   feature = ssl_executor(
+   feature = whisper_executor(
        model='whisper-large',
        task='translate',
        sample_rate=16000,
@@ -88,4 +88,4 @@ Whisper模型由OpenAI Whisper训练 https://github.com/openai/whisper
    Translate Result:
    Detected language: Chinese
    [00:00.000 --> 00:05.000]  I think the most important thing about running is that it brings me good health.
-   {'text': ' I think the most important thing about running is that it brings me good health.', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': ' I think the most important thing about running is that it brings me good health.', 'tokens': [50364, 286, 519, 264, 881, 1021, 551, 466, 2614, 307, 300, 309, 5607, 385, 665, 1585, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.47945233395225123, 'compression_ratio': 1.095890410958904, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}
\ No newline at end of file
+   {'text': ' I think the most important thing about running is that it brings me good health.', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': ' I think the most important thing about running is that it brings me good health.', 'tokens': [50364, 286, 519, 264, 881, 1021, 551, 466, 2614, 307, 300, 309, 5607, 385, 665, 1585, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.47945233395225123, 'compression_ratio': 1.095890410958904, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}
diff --git a/demos/whisper/run.sh b/demos/whisper/run.sh
index 095743bb0..1d758108d 100644
--- a/demos/whisper/run.sh
+++ b/demos/whisper/run.sh
@@ -6,5 +6,5 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 # to recognize text 
 paddlespeech whisper --task transcribe --input ./zh.wav
 
-# to recognize text and transcribe to English
+# to recognize text and translate to English
 paddlespeech whisper --task translate --input ./zh.wav
\ No newline at end of file
diff --git a/paddlespeech/cli/whisper/infer.py b/paddlespeech/cli/whisper/infer.py
index ada888bfc..3b1771b2d 100644
--- a/paddlespeech/cli/whisper/infer.py
+++ b/paddlespeech/cli/whisper/infer.py
@@ -181,18 +181,13 @@ class WhisperExecutor(BaseExecutor):
                                        resource_md5)
             else:
                 raise Exception("wrong type")
-        #model_name = model_type
-        #model_class = self.task_resource.get_model_class(model_name)
-        #model_conf = self.config
-        #model = model_class.from_config(model_conf)
-        #self.model = model
-        #self.model.eval()
 
         # load model
         model_dict = paddle.load(self.ckpt_path)
         dims = ModelDimensions(**model_dict["dims"])
         self.model = Whisper(dims)
         self.model.load_dict(model_dict)
+        self.model.eval()
 
         #set task
         if task is not None:
diff --git a/paddlespeech/s2t/models/whisper/__init__.py b/paddlespeech/s2t/models/whisper/__init__.py
index 1c8adba56..98ab23610 100644
--- a/paddlespeech/s2t/models/whisper/__init__.py
+++ b/paddlespeech/s2t/models/whisper/__init__.py
@@ -2,4 +2,11 @@
 # Copyright (c) 2022 PaddlePaddle Authors and . All Rights Reserved.
 # 
 # Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/__init__.py)
-from .whipser import *
+from paddlespeech.s2t.models.whisper.whipser import decode
+from paddlespeech.s2t.models.whisper.whipser import DecodingOptions
+from paddlespeech.s2t.models.whisper.whipser import DecodingResult
+from paddlespeech.s2t.models.whisper.whipser import detect_language
+from paddlespeech.s2t.models.whisper.whipser import log_mel_spectrogram
+from paddlespeech.s2t.models.whisper.whipser import ModelDimensions
+from paddlespeech.s2t.models.whisper.whipser import transcribe
+from paddlespeech.s2t.models.whisper.whipser import Whisper
diff --git a/paddlespeech/s2t/models/whisper/whipser.py b/paddlespeech/s2t/models/whisper/whipser.py
index 5a495f634..853e11b85 100644
--- a/paddlespeech/s2t/models/whisper/whipser.py
+++ b/paddlespeech/s2t/models/whisper/whipser.py
@@ -194,7 +194,7 @@ class AudioEncoder(nn.Layer):
 
     def forward(self, x: paddle.Tensor):
         """
-        x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
+        x : paddle.Tensor, shape = (batch_size, n_mels, n_ctx)
             the mel spectrogram of the audio
         """
         x = F.gelu(self.conv1(x))
@@ -241,9 +241,9 @@ class TextDecoder(nn.Layer):
                 xa: paddle.Tensor,
                 kv_cache: Optional[dict]=None):
         """
-        x : torch.LongTensor, shape = (batch_size, <= n_ctx)
+        x : paddle.LongTensor, shape = (batch_size, <= n_ctx)
             the text tokens
-        xa : torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
+        xa : paddle.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
             the encoded audio features to be attended on
         """
         offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0
@@ -436,7 +436,7 @@ def transcribe(
     model: Whisper
         The Whisper model instance
 
-    mel: torch.Tensor
+    mel: paddle.Tensor
         The audio feature
 
     verbose: bool
@@ -1264,7 +1264,7 @@ def decode(model: "Whisper",
     model: Whisper
         the Whisper model instance
 
-    mel: torch.Tensor, shape = (80, 3000) or (*, 80, 3000)
+    mel: paddle.Tensor, shape = (80, 3000) or (*, 80, 3000)
         A tensor containing the Mel spectrogram(s)
 
     options: DecodingOptions
@@ -1331,7 +1331,7 @@ class Whisper(nn.Layer):
 
         Returns
         -------
-        cache : Dict[nn.Module, torch.Tensor]
+        cache : Dict[nn.Layer, paddle.Tensor]
             A dictionary object mapping the key/value projection modules to its cache
         hooks : List[RemovableHandle]
             List of PyTorch RemovableHandle objects to stop the hooks to be called
@@ -1429,7 +1429,7 @@ def log_mel_spectrogram(audio: Union[str, np.ndarray, paddle.Tensor],
 
     Parameters
     ----------
-    audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
+    audio: Union[str, np.ndarray, paddle.Tensor], shape = (*)
         The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
 
     n_mels: int
@@ -1437,7 +1437,7 @@ def log_mel_spectrogram(audio: Union[str, np.ndarray, paddle.Tensor],
 
     Returns
     -------
-    torch.Tensor, shape = (80, n_frames)
+    paddle.Tensor, shape = (80, n_frames)
         A Tensor that contains the Mel spectrogram
     """
     if not paddle.is_tensor(audio):