diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py index 1399cb84b..223d1e7af 100644 --- a/paddlespeech/t2s/frontend/zh_frontend.py +++ b/paddlespeech/t2s/frontend/zh_frontend.py @@ -365,15 +365,15 @@ class Frontend(): print("----------------------------") return phonemes - def get_input_ids( - self, - sentence: str, - merge_sentences: bool=True, - get_tone_ids: bool=False, - robot: bool=False, - print_info: bool=False, - add_blank: bool=False, - blank_token: str="") -> Dict[str, List[paddle.Tensor]]: + def get_input_ids(self, + sentence: str, + merge_sentences: bool=True, + get_tone_ids: bool=False, + robot: bool=False, + print_info: bool=False, + add_blank: bool=False, + blank_token: str="", + to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]: phonemes = self.get_phonemes( sentence, merge_sentences=merge_sentences, @@ -384,20 +384,22 @@ class Frontend(): tones = [] temp_phone_ids = [] temp_tone_ids = [] + for part_phonemes in phonemes: phones, tones = self._get_phone_tone( part_phonemes, get_tone_ids=get_tone_ids) - if add_blank: phones = insert_after_character(phones, blank_token) - if tones: tone_ids = self._t2id(tones) - tone_ids = paddle.to_tensor(tone_ids) + if to_tensor: + tone_ids = paddle.to_tensor(tone_ids) temp_tone_ids.append(tone_ids) if phones: phone_ids = self._p2id(phones) - phone_ids = paddle.to_tensor(phone_ids) + # if use paddle.to_tensor() in onnxruntime, the first time will be too low + if to_tensor: + phone_ids = paddle.to_tensor(phone_ids) temp_phone_ids.append(phone_ids) if temp_tone_ids: result["tone_ids"] = temp_tone_ids