diff --git a/speechserving/speechserving/conf/tts/tts_pd.yaml b/speechserving/speechserving/conf/tts/tts_pd.yaml
index e34cb5ab..eebd1f12 100644
--- a/speechserving/speechserving/conf/tts/tts_pd.yaml
+++ b/speechserving/speechserving/conf/tts/tts_pd.yaml
@@ -14,7 +14,7 @@ port: 8692
 am: 'fastspeech2_csmsc'   
 am_model: # the pdmodel file of am static model
 am_params: # the pdiparams file of am static model
-sample_rate: 24000
+am_sample_rate: 24000
 phones_dict: 
 tones_dict: 
 speaker_dict: 
@@ -33,6 +33,7 @@ am_predictor_conf:
 voc: 'pwgan_csmsc'
 voc_model: # the pdmodel file of vocoder static model
 voc_params: # the pdiparams file of vocoder static model 
+voc_sample_rate: 24000
 
 voc_predictor_conf:
   use_gpu: True
diff --git a/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py b/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py
index 40bbcbf4..5a447c08 100644
--- a/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py
+++ b/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py
@@ -83,6 +83,8 @@ pretrained_models = {
         'pwgan_csmsc.pdmodel',
         'params':
         'pwgan_csmsc.pdiparams',
+        'sample_rate':
+        24000,
     },
     # mb_melgan
     "mb_melgan_csmsc-zh": {
@@ -94,6 +96,8 @@ pretrained_models = {
         'mb_melgan_csmsc.pdmodel',
         'params':
         'mb_melgan_csmsc.pdiparams',
+        'sample_rate':
+        24000,
     },
     # hifigan
     "hifigan_csmsc-zh": {
@@ -105,6 +109,8 @@ pretrained_models = {
         'hifigan_csmsc.pdmodel',
         'params':
         'hifigan_csmsc.pdiparams',
+        'sample_rate':
+        24000,
     },
 }
 
@@ -141,13 +147,14 @@ class TTSServerExecutor(TTSExecutor):
             am: str='fastspeech2_csmsc',
             am_model: Optional[os.PathLike]=None,
             am_params: Optional[os.PathLike]=None,
-            sample_rate: int=24000,
+            am_sample_rate: int=24000,
             phones_dict: Optional[os.PathLike]=None,
             tones_dict: Optional[os.PathLike]=None,
             speaker_dict: Optional[os.PathLike]=None,
             voc: str='pwgan_csmsc',
             voc_model: Optional[os.PathLike]=None,
             voc_params: Optional[os.PathLike]=None,
+            voc_sample_rate: int=24000,
             lang: str='zh',
             am_predictor_conf: dict=None,
             voc_predictor_conf: dict=None, ):
@@ -169,7 +176,7 @@ class TTSServerExecutor(TTSExecutor):
             # must have phones_dict in acoustic
             self.phones_dict = os.path.join(
                 am_res_path, pretrained_models[am_tag]['phones_dict'])
-            self.sample_rate = pretrained_models[am_tag]['sample_rate']
+            self.am_sample_rate = pretrained_models[am_tag]['sample_rate']
 
             logger.info(am_res_path)
             logger.info(self.am_model)
@@ -178,7 +185,7 @@ class TTSServerExecutor(TTSExecutor):
             self.am_model = os.path.abspath(am_model)
             self.am_params = os.path.abspath(am_params)
             self.phones_dict = os.path.abspath(phones_dict)
-            self.sample_rate = sample_rate
+            self.am_sample_rate = am_sample_rate
             self.am_res_path = os.path.dirname(os.path.abspath(self.am_model))
         print("self.phones_dict:", self.phones_dict)
 
@@ -207,14 +214,17 @@ class TTSServerExecutor(TTSExecutor):
                                           pretrained_models[voc_tag]['model'])
             self.voc_params = os.path.join(voc_res_path,
                                            pretrained_models[voc_tag]['params'])
+            self.voc_sample_rate = pretrained_models[voc_tag]['sample_rate']
             logger.info(voc_res_path)
             logger.info(self.voc_model)
             logger.info(self.voc_params)
         else:
             self.voc_model = os.path.abspath(voc_model)
             self.voc_params = os.path.abspath(voc_params)
+            self.voc_sample_rate = voc_sample_rate
             self.voc_res_path = os.path.dirname(os.path.abspath(self.voc_model))
 
+        assert (self.voc_sample_rate == self.am_sample_rate)
         # Init body.
         with open(self.phones_dict, "r") as f:
             phn_id = [line.strip().split() for line in f.readlines()]
@@ -343,13 +353,14 @@ class TTSEngine(BaseEngine):
             am=self.conf_dict["am"],
             am_model=self.conf_dict["am_model"],
             am_params=self.conf_dict["am_params"],
-            sample_rate=self.conf_dict["sample_rate"],
+            am_sample_rate=self.conf_dict["am_sample_rate"],
             phones_dict=self.conf_dict["phones_dict"],
             tones_dict=self.conf_dict["tones_dict"],
             speaker_dict=self.conf_dict["speaker_dict"],
             voc=self.conf_dict["voc"],
             voc_model=self.conf_dict["voc_model"],
             voc_params=self.conf_dict["voc_params"],
+            voc_sample_rate=self.conf_dict["voc_sample_rate"],
             lang=self.conf_dict["lang"],
             am_predictor_conf=self.conf_dict["am_predictor_conf"],
             voc_predictor_conf=self.conf_dict["voc_predictor_conf"], )
@@ -451,7 +462,7 @@ class TTSEngine(BaseEngine):
         try:
             target_sample_rate, wav_base64 = self.postprocess(
                 wav=self.executor._outputs['wav'].numpy(),
-                original_fs=self.executor.sample_rate,
+                original_fs=self.executor.am_sample_rate,
                 target_fs=sample_rate,
                 volume=volume,
                 speed=speed,