Feat: npu supported for default model

pull/4084/head
yzz 3 months ago
parent 563217abb0
commit 3cb284f18c

3
.gitignore vendored

@ -16,6 +16,7 @@
build
*output/
.history
.idea
audio/dist/
audio/fc_patch/
@ -51,3 +52,5 @@ tools/onnx-simplifier/
speechx/fc_patch/
third_party/ctc_decoders/paddlespeech_ctcdecoders.py
kernel_meta/

@ -167,10 +167,16 @@ def _get_window(waveform: Tensor,
energy_floor) # (m)
if preemphasis_coefficient != 0.0:
# npu only support mode=constant right now
if paddle.get_device().startswith('npu'):
mode = 'constant'
else:
mode = 'replicate'
offset_strided_input = paddle.nn.functional.pad(
strided_input.unsqueeze(0), (1, 0),
data_format='NCL',
mode='replicate').squeeze(0) # (m, window_size + 1)
mode=mode).squeeze(0) # (m, window_size + 1)
strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, :
-1]

@ -144,6 +144,12 @@ class CLSExecutor(BaseExecutor):
if isinstance(audio_file, (str, os.PathLike)):
logger.debug("Preprocessing audio_file:" + audio_file)
# set 'pad_mode' be 'constant' when device is npu, otherwise be the default 'pad_mode' value
if paddle.get_device().startswith('npu'):
pad_mode_kwarg = {"pad_mode": "constant"}
else:
pad_mode_kwarg = {}
# Feature extraction
feature_extractor = LogMelSpectrogram(
sr=feat_conf['sample_rate'],
@ -153,7 +159,9 @@ class CLSExecutor(BaseExecutor):
win_length=feat_conf['window_length'],
f_min=feat_conf['f_min'],
f_max=feat_conf['f_max'],
n_mels=feat_conf['n_mels'], )
n_mels=feat_conf['n_mels'],
**pad_mode_kwarg,
)
feats = feature_extractor(
paddle.to_tensor(paddle.to_tensor(waveform).unsqueeze(0)))
self._inputs['feats'] = paddle.transpose(feats, [0, 2, 1]).unsqueeze(

@ -451,12 +451,25 @@ def get_voc_inference(
voc_name = voc[:voc.rindex('_')]
voc_class = dynamic_import(voc_name, model_alias)
voc_inference_class = dynamic_import(voc_name + '_inference', model_alias)
# npu only support mode=constant right now
# this code has been adapted to support 'paddlespeech.t2s.models.melgan.melgan.MelGANGenerator'
npu_pad_mode = {"mode": "constant"} if paddle.get_device().startswith('npu') else {}
if voc_name != 'wavernn':
if npu_pad_mode:
voc_config["generator_params"].setdefault("pad_params", {})
voc_config["generator_params"]["pad_params"].update(npu_pad_mode)
voc = voc_class(**voc_config["generator_params"])
voc.set_state_dict(paddle.load(voc_ckpt)["generator_params"])
voc.remove_weight_norm()
voc.eval()
else:
if npu_pad_mode:
voc_config["model"].setdefault("pad_params", {})
voc_config["model"]["pad_params"].update(npu_pad_mode)
voc = voc_class(**voc_config["model"])
voc.set_state_dict(paddle.load(voc_ckpt)["main_params"])
voc.eval()

Loading…
Cancel
Save