diff --git a/speechserving/speechserving/conf/tts/tts_pd.yaml b/speechserving/speechserving/conf/tts/tts_pd.yaml index 7a1ca5dc..e34cb5ab 100644 --- a/speechserving/speechserving/conf/tts/tts_pd.yaml +++ b/speechserving/speechserving/conf/tts/tts_pd.yaml @@ -12,17 +12,18 @@ port: 8692 # am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] ################################################################## am: 'fastspeech2_csmsc' -am_model: -am_params: -phones_dict: './dict_dir/phone_id_map.txt' +am_model: # the pdmodel file of am static model +am_params: # the pdiparams file of am static model +sample_rate: 24000 +phones_dict: tones_dict: speaker_dict: spk_id: 0 am_predictor_conf: - use_gpu: 'true' - enable_mkldnn: 'true' - switch_ir_optim: 'true' + use_gpu: True + enable_mkldnn: True + switch_ir_optim: True ################################################################## @@ -30,16 +31,16 @@ am_predictor_conf: # voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] ################################################################## voc: 'pwgan_csmsc' -voc_model: -voc_params: +voc_model: # the pdmodel file of vocoder static model +voc_params: # the pdiparams file of vocoder static model voc_predictor_conf: - use_gpu: 'true' - enable_mkldnn: 'true' - switch_ir_optim: 'true' + use_gpu: True + enable_mkldnn: True + switch_ir_optim: True ################################################################## # OTHERS # ################################################################## lang: 'zh' -device: paddle.get_device() \ No newline at end of file +device: paddle.get_device() diff --git a/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py b/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py index b394ceaa..40bbcbf4 100644 --- a/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py +++ b/speechserving/speechserving/engine/tts/paddleinference/tts_engine.py @@ -36,8 +36,6 @@ from utils.errors import ErrorCode from utils.exception import ServerBaseException from utils.paddle_predictor import init_predictor from utils.paddle_predictor import run_model -#from paddle.inference import Config -#from paddle.inference import create_predictor __all__ = ['TTSEngine'] @@ -48,7 +46,7 @@ pretrained_models = { 'url': 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip', 'md5': - '9a849a74d1be0c758dd5a1b9c8f77f3d', + 'f10cbdedf47dc7a9668d2264494e1823', 'model': 'speedyspeech_csmsc.pdmodel', 'params': @@ -57,19 +55,23 @@ pretrained_models = { 'phone_id_map.txt', 'tones_dict': 'tone_id_map.txt', + 'sample_rate': + 24000, }, # fastspeech2 "fastspeech2_csmsc-zh": { 'url': 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip', 'md5': - '8eb01c2e4bc7e8b59beaa9fa046069cf', + '9788cd9745e14c7a5d12d32670b2a5a7', 'model': 'fastspeech2_csmsc.pdmodel', 'params': 'fastspeech2_csmsc.pdiparams', 'phones_dict': 'phone_id_map.txt', + 'sample_rate': + 24000, }, # pwgan "pwgan_csmsc-zh": { @@ -139,6 +141,7 @@ class TTSServerExecutor(TTSExecutor): am: str='fastspeech2_csmsc', am_model: Optional[os.PathLike]=None, am_params: Optional[os.PathLike]=None, + sample_rate: int=24000, phones_dict: Optional[os.PathLike]=None, tones_dict: Optional[os.PathLike]=None, speaker_dict: Optional[os.PathLike]=None, @@ -156,11 +159,7 @@ class TTSServerExecutor(TTSExecutor): return # am am_tag = am + '-' + lang - if phones_dict is None: - print("please input phones_dict!") - ### 后续下载的模型里加上 phone 和 tone的 dict 就不用这个了 - #if am_model is None or am_params is None or phones_dict is None: - if am_model is None or am_params is None: + if am_model is None or am_params is None or phones_dict is None: am_res_path = self._get_pretrained_path(am_tag) self.am_res_path = am_res_path self.am_model = os.path.join(am_res_path, @@ -168,10 +167,10 @@ class TTSServerExecutor(TTSExecutor): self.am_params = os.path.join(am_res_path, pretrained_models[am_tag]['params']) # must have phones_dict in acoustic - #self.phones_dict = os.path.join( - #am_res_path, pretrained_models[am_tag]['phones_dict']) + self.phones_dict = os.path.join( + am_res_path, pretrained_models[am_tag]['phones_dict']) + self.sample_rate = pretrained_models[am_tag]['sample_rate'] - self.phones_dict = os.path.abspath(phones_dict) logger.info(am_res_path) logger.info(self.am_model) logger.info(self.am_params) @@ -179,6 +178,7 @@ class TTSServerExecutor(TTSExecutor): self.am_model = os.path.abspath(am_model) self.am_params = os.path.abspath(am_params) self.phones_dict = os.path.abspath(phones_dict) + self.sample_rate = sample_rate self.am_res_path = os.path.dirname(os.path.abspath(self.am_model)) print("self.phones_dict:", self.phones_dict) @@ -343,6 +343,7 @@ class TTSEngine(BaseEngine): am=self.conf_dict["am"], am_model=self.conf_dict["am_model"], am_params=self.conf_dict["am_params"], + sample_rate=self.conf_dict["sample_rate"], phones_dict=self.conf_dict["phones_dict"], tones_dict=self.conf_dict["tones_dict"], speaker_dict=self.conf_dict["speaker_dict"], @@ -450,8 +451,7 @@ class TTSEngine(BaseEngine): try: target_sample_rate, wav_base64 = self.postprocess( wav=self.executor._outputs['wav'].numpy(), - #original_fs=self.executor.am_config.fs, - original_fs=24000, # TODO get sample rate from model + original_fs=self.executor.sample_rate, target_fs=sample_rate, volume=volume, speed=speed, diff --git a/speechserving/speechserving/utils/paddle_predictor.py b/speechserving/speechserving/utils/paddle_predictor.py index 608b2b4c..f910161b 100644 --- a/speechserving/speechserving/utils/paddle_predictor.py +++ b/speechserving/speechserving/utils/paddle_predictor.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +from typing import List from typing import Optional from paddle.inference import Config @@ -31,21 +32,20 @@ def init_predictor(model_dir: Optional[os.PathLike]=None, predictor_conf (dict, optional): The configuration parameters of predictor. Defaults to None. Returns: - [type]: [description] + predictor (PaddleInferPredictor): created predictor """ + if model_dir is not None: config = Config(args.model_dir) else: config = Config(model_file, params_file) config.enable_memory_optim() - if "use_gpu" in predictor_conf and predictor_conf["use_gpu"] == "true": + if predictor_conf["use_gpu"]: config.enable_use_gpu(1000, 0) - if "enable_mkldnn" in predictor_conf and predictor_conf[ - "enable_mkldnn"] == "true": + if predictor_conf["enable_mkldnn"]: config.enable_mkldnn() - if "switch_ir_optim" in predictor_conf and predictor_conf[ - "switch_ir_optim"] == "true": + if predictor_conf["switch_ir_optim"]: config.switch_ir_optim() predictor = create_predictor(config) @@ -53,7 +53,7 @@ def init_predictor(model_dir: Optional[os.PathLike]=None, return predictor -def run_model(predictor, input: list): +def run_model(predictor, input: List) -> List: """ run predictor Args: