diff --git a/.gitignore b/.gitignore
index 4a0c43312..d31cfc06c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@
 build
 *output/
 .history
+.idea
 
 audio/dist/
 audio/fc_patch/
@@ -51,3 +52,5 @@ tools/onnx-simplifier/
 speechx/fc_patch/
 
 third_party/ctc_decoders/paddlespeech_ctcdecoders.py
+
+kernel_meta/
diff --git a/paddlespeech/audio/compliance/kaldi.py b/paddlespeech/audio/compliance/kaldi.py
index a94ec4053..f15fdfd5d 100644
--- a/paddlespeech/audio/compliance/kaldi.py
+++ b/paddlespeech/audio/compliance/kaldi.py
@@ -167,10 +167,16 @@ def _get_window(waveform: Tensor,
                                             energy_floor)  # (m)
 
     if preemphasis_coefficient != 0.0:
+        # npu only support mode=constant right now
+        if paddle.get_device().startswith('npu'):
+            mode = 'constant'
+        else:
+            mode = 'replicate'
+
         offset_strided_input = paddle.nn.functional.pad(
             strided_input.unsqueeze(0), (1, 0),
             data_format='NCL',
-            mode='replicate').squeeze(0)  # (m, window_size + 1)
+            mode=mode).squeeze(0)  # (m, window_size + 1)
         strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, :
                                                                                        -1]
 
diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py
index 54780fdd2..e1be8bad0 100644
--- a/paddlespeech/cli/cls/infer.py
+++ b/paddlespeech/cli/cls/infer.py
@@ -144,6 +144,12 @@ class CLSExecutor(BaseExecutor):
         if isinstance(audio_file, (str, os.PathLike)):
             logger.debug("Preprocessing audio_file:" + audio_file)
 
+        # set 'pad_mode' be 'constant' when device is npu, otherwise be the default 'pad_mode' value
+        if paddle.get_device().startswith('npu'):
+            pad_mode_kwarg = {"pad_mode": "constant"}
+        else:
+            pad_mode_kwarg = {}
+
         # Feature extraction
         feature_extractor = LogMelSpectrogram(
             sr=feat_conf['sample_rate'],
@@ -153,7 +159,9 @@ class CLSExecutor(BaseExecutor):
             win_length=feat_conf['window_length'],
             f_min=feat_conf['f_min'],
             f_max=feat_conf['f_max'],
-            n_mels=feat_conf['n_mels'], )
+            n_mels=feat_conf['n_mels'],
+            **pad_mode_kwarg,
+        )
         feats = feature_extractor(
             paddle.to_tensor(paddle.to_tensor(waveform).unsqueeze(0)))
         self._inputs['feats'] = paddle.transpose(feats, [0, 2, 1]).unsqueeze(
diff --git a/paddlespeech/t2s/exps/syn_utils.py b/paddlespeech/t2s/exps/syn_utils.py
index acfaa012d..53c88d6d0 100644
--- a/paddlespeech/t2s/exps/syn_utils.py
+++ b/paddlespeech/t2s/exps/syn_utils.py
@@ -451,12 +451,25 @@ def get_voc_inference(
     voc_name = voc[:voc.rindex('_')]
     voc_class = dynamic_import(voc_name, model_alias)
     voc_inference_class = dynamic_import(voc_name + '_inference', model_alias)
+
+    # npu only support mode=constant right now
+    # this code has been adapted to support 'paddlespeech.t2s.models.melgan.melgan.MelGANGenerator'
+    npu_pad_mode = {"mode": "constant"} if paddle.get_device().startswith('npu') else {}
+
     if voc_name != 'wavernn':
+        if npu_pad_mode:
+            voc_config["generator_params"].setdefault("pad_params", {})
+            voc_config["generator_params"]["pad_params"].update(npu_pad_mode)
+
         voc = voc_class(**voc_config["generator_params"])
         voc.set_state_dict(paddle.load(voc_ckpt)["generator_params"])
         voc.remove_weight_norm()
         voc.eval()
     else:
+        if npu_pad_mode:
+            voc_config["model"].setdefault("pad_params", {})
+            voc_config["model"]["pad_params"].update(npu_pad_mode)
+
         voc = voc_class(**voc_config["model"])
         voc.set_state_dict(paddle.load(voc_ckpt)["main_params"])
         voc.eval()