diff --git a/demos/streaming_tts_server/README.md b/demos/streaming_tts_server/README.md
index 860d9a97..cbea6bf7 100644
--- a/demos/streaming_tts_server/README.md
+++ b/demos/streaming_tts_server/README.md
@@ -119,12 +119,9 @@ The configuration file can be found in `conf/tts_online_application.yaml`.
     - `protocol`: Service protocol, choices: [http, websocket], default: http.
     - `input`: (required): Input text to generate.
     - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0
-    - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0
-    - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0
-    - `sample_rate`: Sampling rate, choices: [0, 8000, 16000], the default is the same as the model. Default: 0
-    - `output`: Output wave filepath. Default: None, which means not to save the audio to the local.
+    - `output`: Client output wave filepath. Default: None, which means not to save the audio to the local.
     - `play`: Whether to play audio, play while synthesizing, default value: False, which means not playing. **Playing audio needs to rely on the pyaudio library**.
-    - `spk_id, speed, volume, sample_rate` do not take effect in streaming speech synthesis service temporarily.
+    - Currently, only the single-speaker model is supported in the code, so `spk_id` does not take effect. Streaming TTS does not support changing sample rate, variable speed and volume.
     
     Output:
     ```bash
@@ -150,9 +147,6 @@ The configuration file can be found in `conf/tts_online_application.yaml`.
       port=8092,
       protocol="http",
       spk_id=0,
-      speed=1.0,
-      volume=1.0,
-      sample_rate=0,
       output="./output.wav",
       play=False)
 
@@ -256,12 +250,10 @@ The configuration file can be found in `conf/tts_online_application.yaml`.
     - `protocol`: Service protocol, choices: [http, websocket], default: http.
     - `input`: (required): Input text to generate.
     - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0
-    - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0
-    - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0
-    - `sample_rate`: Sampling rate, choices: [0, 8000, 16000], the default is the same as the model. Default: 0
-    - `output`: Output wave filepath. Default: None, which means not to save the audio to the local.
+    - `output`: Client output wave filepath. Default: None, which means not to save the audio to the local.
     - `play`: Whether to play audio, play while synthesizing, default value: False, which means not playing. **Playing audio needs to rely on the pyaudio library**.
-    - `spk_id, speed, volume, sample_rate` do not take effect in streaming speech synthesis service temporarily.
+    - Currently, only the single-speaker model is supported in the code, so `spk_id` does not take effect. Streaming TTS does not support changing sample rate, variable speed and volume.
+    
 
     
     Output:
@@ -288,9 +280,6 @@ The configuration file can be found in `conf/tts_online_application.yaml`.
       port=8092,
       protocol="websocket",
       spk_id=0,
-      speed=1.0,
-      volume=1.0,
-      sample_rate=0,
       output="./output.wav",
       play=False)
 
diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index 254ec26a..3cd28170 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -118,12 +118,9 @@
     - `protocol`: 服务协议，可选 [http, websocket], 默认: http。
     - `input`: (必须输入): 待合成的文本。
     - `spk_id`: 说话人 id，用于多说话人语音合成，默认值： 0。
-    - `speed`: 音频速度，该值应设置在 0 到 3 之间。 默认值：1.0
-    - `volume`: 音频音量，该值应设置在 0 到 3 之间。 默认值： 1.0
-    - `sample_rate`: 采样率，可选 [0, 8000, 16000]，默认值：0，表示与模型采样率相同
-    - `output`: 输出音频的路径， 默认值：None，表示不保存音频到本地。
+    - `output`: 客户端输出音频的路径， 默认值：None，表示不保存音频。
     - `play`: 是否播放音频，边合成边播放， 默认值：False，表示不播放。**播放音频需要依赖pyaudio库**。
-    - `spk_id, speed, volume, sample_rate` 在流式语音合成服务中暂时不生效。
+    - 目前代码中只支持单说话人的模型，因此 spk_id 的选择并不生效。流式 TTS 不支持更换采样率，变速和变音量等功能。
 
     
     输出:
@@ -150,9 +147,6 @@
       port=8092,
       protocol="http",
       spk_id=0,
-      speed=1.0,
-      volume=1.0,
-      sample_rate=0,
       output="./output.wav",
       play=False)
 
@@ -256,12 +250,10 @@
     - `protocol`: 服务协议，可选 [http, websocket], 默认: http。
     - `input`: (必须输入): 待合成的文本。
     - `spk_id`: 说话人 id，用于多说话人语音合成，默认值： 0。
-    - `speed`: 音频速度，该值应设置在 0 到 3 之间。 默认值：1.0
-    - `volume`: 音频音量，该值应设置在 0 到 3 之间。 默认值： 1.0
-    - `sample_rate`: 采样率，可选 [0, 8000, 16000]，默认值：0，表示与模型采样率相同
-    - `output`: 输出音频的路径， 默认值：None，表示不保存音频到本地。
+    - `output`: 客户端输出音频的路径， 默认值：None，表示不保存音频。
     - `play`: 是否播放音频，边合成边播放， 默认值：False，表示不播放。**播放音频需要依赖pyaudio库**。
-    - `spk_id, speed, volume, sample_rate` 在流式语音合成服务中暂时不生效。
+    - 目前代码中只支持单说话人的模型，因此 spk_id 的选择并不生效。流式 TTS 不支持更换采样率，变速和变音量等功能。
+
 
     
     输出:
@@ -288,9 +280,6 @@
       port=8092,
       protocol="websocket",
       spk_id=0,
-      speed=1.0,
-      volume=1.0,
-      sample_rate=0,
       output="./output.wav",
       play=False)
 
diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py
index bd1186df..e8e57fff 100644
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -191,23 +191,10 @@ class TTSOnlineClientExecutor(BaseExecutor):
         self.parser.add_argument(
             '--spk_id', type=int, default=0, help='Speaker id')
         self.parser.add_argument(
-            '--speed',
-            type=float,
-            default=1.0,
-            help='Audio speed, the value should be set between 0 and 3')
-        self.parser.add_argument(
-            '--volume',
-            type=float,
-            default=1.0,
-            help='Audio volume, the value should be set between 0 and 3')
-        self.parser.add_argument(
-            '--sample_rate',
-            type=int,
-            default=0,
-            choices=[0, 8000, 16000],
-            help='Sampling rate, the default is the same as the model')
-        self.parser.add_argument(
-            '--output', type=str, default=None, help='Synthesized audio file')
+            '--output',
+            type=str,
+            default=None,
+            help='Client saves synthesized audio')
         self.parser.add_argument(
             "--play", type=bool, help="whether to play audio", default=False)
 
@@ -218,9 +205,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
         port = args.port
         protocol = args.protocol
         spk_id = args.spk_id
-        speed = args.speed
-        volume = args.volume
-        sample_rate = args.sample_rate
         output = args.output
         play = args.play
 
@@ -231,9 +215,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
                 port=port,
                 protocol=protocol,
                 spk_id=spk_id,
-                speed=speed,
-                volume=volume,
-                sample_rate=sample_rate,
                 output=output,
                 play=play)
             return True
@@ -249,9 +230,6 @@ class TTSOnlineClientExecutor(BaseExecutor):
                  port: int=8092,
                  protocol: str="http",
                  spk_id: int=0,
-                 speed: float=1.0,
-                 volume: float=1.0,
-                 sample_rate: int=0,
                  output: str=None,
                  play: bool=False):
         """
@@ -263,7 +241,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
             from paddlespeech.server.utils.audio_handler import TTSHttpHandler
             handler = TTSHttpHandler(server_ip, port, play)
             first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = handler.run(
-                input, spk_id, speed, volume, sample_rate, output)
+                input, spk_id, output)
             delay_time_list = compute_delay(receive_time_list,
                                             chunk_duration_list)
 
@@ -273,7 +251,7 @@ class TTSOnlineClientExecutor(BaseExecutor):
             handler = TTSWsHandler(server_ip, port, play)
             loop = asyncio.get_event_loop()
             first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = loop.run_until_complete(
-                handler.run(input, output))
+                handler.run(input, spk_id, output))
             delay_time_list = compute_delay(receive_time_list,
                                             chunk_duration_list)
 
diff --git a/paddlespeech/server/engine/tts/online/onnx/tts_engine.py b/paddlespeech/server/engine/tts/online/onnx/tts_engine.py
index 7b8e04e8..0995a55d 100644
--- a/paddlespeech/server/engine/tts/online/onnx/tts_engine.py
+++ b/paddlespeech/server/engine/tts/online/onnx/tts_engine.py
@@ -64,6 +64,7 @@ class TTSServerExecutor(TTSExecutor):
                  self, 'am_postnet_sess'))) and hasattr(self, 'voc_inference'):
             logger.debug('Models had been initialized.')
             return
+
         # am
         am_tag = am + '-' + lang
         if am == "fastspeech2_csmsc_onnx":
@@ -211,6 +212,8 @@ class TTSEngine(BaseEngine):
             self.config.voc_sample_rate == self.config.am_sample_rate
         ), "The sample rate of AM and Vocoder model are different, please check model."
 
+        self.sample_rate = self.config.voc_sample_rate
+
         try:
             if self.config.am_sess_conf.device is not None:
                 self.device = self.config.am_sess_conf.device
@@ -439,33 +442,13 @@ class PaddleTTSConnectionHandler:
 
         self.final_response_time = time.time() - frontend_st
 
-    def preprocess(self, text_bese64: str=None, text_bytes: bytes=None):
-        # Convert byte to text
-        if text_bese64:
-            text_bytes = base64.b64decode(text_bese64)  # base64 to bytes
-        text = text_bytes.decode('utf-8')  # bytes to text
-
-        return text
-
-    def run(self,
-            sentence: str,
-            spk_id: int=0,
-            speed: float=1.0,
-            volume: float=1.0,
-            sample_rate: int=0,
-            save_path: str=None):
+    def run(self, sentence: str, spk_id: int=0):
         """ run include inference and postprocess.
 
         Args:
             sentence (str): text to be synthesized
             spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
-            speed (float, optional): speed. Defaults to 1.0.
-            volume (float, optional): volume. Defaults to 1.0.
-            sample_rate (int, optional): target sample rate for synthesized audio, 
-            0 means the same as the model sampling rate. Defaults to 0.
-            save_path (str, optional): The save path of the synthesized audio. 
-            None means do not save audio. Defaults to None.
-
+            
         Returns:
             wav_base64: The base64 format of the synthesized audio.
         """
@@ -486,7 +469,7 @@ class PaddleTTSConnectionHandler:
             yield wav_base64
 
         wav_all = np.concatenate(wav_list, axis=0)
-        duration = len(wav_all) / self.config.voc_sample_rate
+        duration = len(wav_all) / self.tts_engine.sample_rate
         logger.info(f"sentence: {sentence}")
         logger.info(f"The durations of audio is: {duration} s")
         logger.info(f"first response time: {self.first_response_time} s")
diff --git a/paddlespeech/server/engine/tts/online/python/tts_engine.py b/paddlespeech/server/engine/tts/online/python/tts_engine.py
index 9bd95849..a46b84bd 100644
--- a/paddlespeech/server/engine/tts/online/python/tts_engine.py
+++ b/paddlespeech/server/engine/tts/online/python/tts_engine.py
@@ -282,6 +282,12 @@ class TTSEngine(BaseEngine):
             logger.error(e)
             return False
 
+        assert (
+            self.executor.am_config.fs == self.executor.voc_config.fs
+        ), "The sample rate of AM and Vocoder model are different, please check model."
+
+        self.sample_rate = self.executor.am_config.fs
+
         self.am_block = self.config.am_block
         self.am_pad = self.config.am_pad
         self.voc_block = self.config.voc_block
@@ -465,32 +471,15 @@ class PaddleTTSConnectionHandler:
 
         self.final_response_time = time.time() - frontend_st
 
-    def preprocess(self, text_bese64: str=None, text_bytes: bytes=None):
-        # Convert byte to text
-        if text_bese64:
-            text_bytes = base64.b64decode(text_bese64)  # base64 to bytes
-        text = text_bytes.decode('utf-8')  # bytes to text
-
-        return text
-
-    def run(self,
+    def run(
+            self,
             sentence: str,
-            spk_id: int=0,
-            speed: float=1.0,
-            volume: float=1.0,
-            sample_rate: int=0,
-            save_path: str=None):
+            spk_id: int=0, ):
         """ run include inference and postprocess.
 
         Args:
             sentence (str): text to be synthesized
             spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
-            speed (float, optional): speed. Defaults to 1.0.
-            volume (float, optional): volume. Defaults to 1.0.
-            sample_rate (int, optional): target sample rate for synthesized audio, 
-            0 means the same as the model sampling rate. Defaults to 0.
-            save_path (str, optional): The save path of the synthesized audio. 
-            None means do not save audio. Defaults to None.
 
         Returns:
             wav_base64: The base64 format of the synthesized audio.
@@ -513,7 +502,7 @@ class PaddleTTSConnectionHandler:
             yield wav_base64
 
         wav_all = np.concatenate(wav_list, axis=0)
-        duration = len(wav_all) / self.executor.am_config.fs
+        duration = len(wav_all) / self.tts_engine.sample_rate
 
         logger.info(f"sentence: {sentence}")
         logger.info(f"The durations of audio is: {duration} s")
diff --git a/paddlespeech/server/restful/tts_api.py b/paddlespeech/server/restful/tts_api.py
index 53fe159f..61e4c49f 100644
--- a/paddlespeech/server/restful/tts_api.py
+++ b/paddlespeech/server/restful/tts_api.py
@@ -140,7 +140,9 @@ def tts(request_body: TTSRequest):
 
 @router.post("/paddlespeech/tts/streaming")
 async def stream_tts(request_body: TTSRequest):
+    # get params
     text = request_body.text
+    spk_id = request_body.spk_id
 
     engine_pool = get_engine_pool()
     tts_engine = engine_pool['tts']
@@ -156,4 +158,24 @@ async def stream_tts(request_body: TTSRequest):
 
     connection_handler = PaddleTTSConnectionHandler(tts_engine)
 
-    return StreamingResponse(connection_handler.run(sentence=text))
+    return StreamingResponse(
+        connection_handler.run(sentence=text, spk_id=spk_id))
+
+
+@router.get("/paddlespeech/tts/streaming/samplerate")
+def get_samplerate():
+    try:
+        engine_pool = get_engine_pool()
+        tts_engine = engine_pool['tts']
+        logger.info("Get tts engine successfully.")
+        sample_rate = tts_engine.sample_rate
+
+        response = {"sample_rate": sample_rate}
+
+    except ServerBaseException as e:
+        response = failed_response(e.error_code, e.msg)
+    except BaseException:
+        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
+        traceback.print_exc()
+
+    return response
diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py
index d4540781..b5629037 100644
--- a/paddlespeech/server/utils/audio_handler.py
+++ b/paddlespeech/server/utils/audio_handler.py
@@ -266,6 +266,12 @@ class TTSWsHandler:
         self.url = "ws://" + self.server + ":" + str(
             self.port) + "/paddlespeech/tts/streaming"
         self.play = play
+
+        # get model sample rate
+        self.url_get_sr = "http://" + str(self.server) + ":" + str(
+            self.port) + "/paddlespeech/tts/streaming/samplerate"
+        self.sample_rate = requests.get(self.url_get_sr).json()["sample_rate"]
+
         if self.play:
             import pyaudio
             self.buffer = b''
@@ -273,7 +279,7 @@ class TTSWsHandler:
             self.stream = self.p.open(
                 format=self.p.get_format_from_width(2),
                 channels=1,
-                rate=24000,
+                rate=self.sample_rate,
                 output=True)
             self.mutex = threading.Lock()
             self.start_play = True
@@ -293,12 +299,13 @@ class TTSWsHandler:
             self.buffer = b''
             self.mutex.release()
 
-    async def run(self, text: str, output: str=None):
+    async def run(self, text: str, spk_id=0, output: str=None):
         """Send a text to online server
 
         Args:
             text (str): sentence to be synthesized
-            output (str): save audio path
+            spk_id (int, optional): speaker id. Defaults to 0.
+            output (str, optional): client save audio path. Defaults to None.
         """
         all_bytes = b''
         receive_time_list = []
@@ -315,8 +322,13 @@ class TTSWsHandler:
             session = msg["session"]
 
             # 3. send speech synthesis request 
-            text_base64 = str(base64.b64encode((text).encode('utf-8')), "UTF8")
-            request = json.dumps({"text": text_base64})
+            #text_base64 = str(base64.b64encode((text).encode('utf-8')), "UTF8")
+            params = {
+                "text": text,
+                "spk_id": spk_id,
+            }
+
+            request = json.dumps(params)
             st = time.time()
             await ws.send(request)
             logging.debug("send a message to the server")
@@ -341,10 +353,11 @@ class TTSWsHandler:
                 # Rerutn last packet normally, no audio information
                 elif status == 2:
                     final_response = time.time() - st
-                    duration = len(all_bytes) / 2.0 / 24000
+                    duration = len(all_bytes) / 2.0 / self.sample_rate
 
                     if output is not None:
-                        save_audio_success = save_audio(all_bytes, output)
+                        save_audio_success = save_audio(all_bytes, output,
+                                                        self.sample_rate)
                     else:
                         save_audio_success = False
 
@@ -362,7 +375,8 @@ class TTSWsHandler:
                     receive_time_list.append(time.time())
                     audio = message["audio"]
                     audio = base64.b64decode(audio)  # bytes
-                    chunk_duration_list.append(len(audio) / 2.0 / 24000)
+                    chunk_duration_list.append(
+                        len(audio) / 2.0 / self.sample_rate)
                     all_bytes += audio
                     if self.play:
                         self.mutex.acquire()
@@ -403,19 +417,26 @@ class TTSHttpHandler:
             self.port) + "/paddlespeech/tts/streaming"
         self.play = play
 
+        # get model sample rate
+        self.url_get_sr = "http://" + str(self.server) + ":" + str(
+            self.port) + "/paddlespeech/tts/streaming/samplerate"
+        self.sample_rate = requests.get(self.url_get_sr).json()["sample_rate"]
+
         if self.play:
             import pyaudio
             self.buffer = b''
             self.p = pyaudio.PyAudio()
+            self.start_play = True
+            self.max_fail = 50
+
             self.stream = self.p.open(
                 format=self.p.get_format_from_width(2),
                 channels=1,
-                rate=24000,
+                rate=self.sample_rate,
                 output=True)
             self.mutex = threading.Lock()
-            self.start_play = True
             self.t = threading.Thread(target=self.play_audio)
-            self.max_fail = 50
+
         logger.info(f"endpoint: {self.url}")
 
     def play_audio(self):
@@ -430,31 +451,19 @@ class TTSHttpHandler:
             self.buffer = b''
             self.mutex.release()
 
-    def run(self,
-            text: str,
-            spk_id=0,
-            speed=1.0,
-            volume=1.0,
-            sample_rate=0,
-            output: str=None):
+    def run(self, text: str, spk_id=0, output: str=None):
         """Send a text to tts online server
 
         Args:
             text (str): sentence to be synthesized.
             spk_id (int, optional): speaker id. Defaults to 0.
-            speed (float, optional): audio speed. Defaults to 1.0.
-            volume (float, optional): audio volume. Defaults to 1.0.
-            sample_rate (int, optional): audio sample rate, 0 means the same as model. Defaults to 0.
-            output (str, optional): save audio path. Defaults to None.
+            output (str, optional): client save audio path. Defaults to None.
         """
+
         # 1. Create request
         params = {
             "text": text,
             "spk_id": spk_id,
-            "speed": speed,
-            "volume": volume,
-            "sample_rate": sample_rate,
-            "save_path": output
         }
 
         all_bytes = b''
@@ -482,14 +491,14 @@ class TTSHttpHandler:
                     self.t.start()
                     self.start_play = False
             all_bytes += audio
-            chunk_duration_list.append(len(audio) / 2.0 / 24000)
+            chunk_duration_list.append(len(audio) / 2.0 / self.sample_rate)
 
         final_response = time.time() - st
-        duration = len(all_bytes) / 2.0 / 24000
+        duration = len(all_bytes) / 2.0 / self.sample_rate
         html.close()  # when stream=True
 
         if output is not None:
-            save_audio_success = save_audio(all_bytes, output)
+            save_audio_success = save_audio(all_bytes, output, self.sample_rate)
         else:
             save_audio_success = False
 
diff --git a/paddlespeech/server/ws/tts_api.py b/paddlespeech/server/ws/tts_api.py
index 3d8b222e..275711f5 100644
--- a/paddlespeech/server/ws/tts_api.py
+++ b/paddlespeech/server/ws/tts_api.py
@@ -87,12 +87,12 @@ async def websocket_endpoint(websocket: WebSocket):
 
             # speech synthesis request 
             elif 'text' in message:
-                text_bese64 = message["text"]
-                sentence = connection_handler.preprocess(
-                    text_bese64=text_bese64)
+                text = message["text"]
+                spk_id = message["spk_id"]
 
                 # run
-                wav_generator = connection_handler.run(sentence)
+                wav_generator = connection_handler.run(
+                    sentence=text, spk_id=spk_id)
 
                 while True:
                     try:
@@ -116,3 +116,22 @@ async def websocket_endpoint(websocket: WebSocket):
 
     except Exception as e:
         logger.error(e)
+
+
+@router.get("/paddlespeech/tts/streaming/samplerate")
+def get_samplerate():
+    try:
+        engine_pool = get_engine_pool()
+        tts_engine = engine_pool['tts']
+        logger.info("Get tts engine successfully.")
+        sample_rate = tts_engine.sample_rate
+
+        response = {"sample_rate": sample_rate}
+
+    except ServerBaseException as e:
+        response = failed_response(e.error_code, e.msg)
+    except BaseException:
+        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
+        traceback.print_exc()
+
+    return response