Merge branch 'tts-server3' of https://github.com/lym0302/PaddleSpeech into tts-server3

pull/1425/head
lym0302 3 years ago
commit 830e91ca5c

@ -12,17 +12,18 @@ port: 8692
# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] # am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
################################################################## ##################################################################
am: 'fastspeech2_csmsc' am: 'fastspeech2_csmsc'
am_model: am_model: # the pdmodel file of am static model
am_params: am_params: # the pdiparams file of am static model
phones_dict: './dict_dir/phone_id_map.txt' sample_rate: 24000
phones_dict:
tones_dict: tones_dict:
speaker_dict: speaker_dict:
spk_id: 0 spk_id: 0
am_predictor_conf: am_predictor_conf:
use_gpu: 'true' use_gpu: True
enable_mkldnn: 'true' enable_mkldnn: True
switch_ir_optim: 'true' switch_ir_optim: True
################################################################## ##################################################################
@ -30,16 +31,16 @@ am_predictor_conf:
# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] # voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
################################################################## ##################################################################
voc: 'pwgan_csmsc' voc: 'pwgan_csmsc'
voc_model: voc_model: # the pdmodel file of vocoder static model
voc_params: voc_params: # the pdiparams file of vocoder static model
voc_predictor_conf: voc_predictor_conf:
use_gpu: 'true' use_gpu: True
enable_mkldnn: 'true' enable_mkldnn: True
switch_ir_optim: 'true' switch_ir_optim: True
################################################################## ##################################################################
# OTHERS # # OTHERS #
################################################################## ##################################################################
lang: 'zh' lang: 'zh'
device: paddle.get_device() device: paddle.get_device()

@ -36,8 +36,6 @@ from utils.errors import ErrorCode
from utils.exception import ServerBaseException from utils.exception import ServerBaseException
from utils.paddle_predictor import init_predictor from utils.paddle_predictor import init_predictor
from utils.paddle_predictor import run_model from utils.paddle_predictor import run_model
#from paddle.inference import Config
#from paddle.inference import create_predictor
__all__ = ['TTSEngine'] __all__ = ['TTSEngine']
@ -48,7 +46,7 @@ pretrained_models = {
'url': 'url':
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip', 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip',
'md5': 'md5':
'9a849a74d1be0c758dd5a1b9c8f77f3d', 'f10cbdedf47dc7a9668d2264494e1823',
'model': 'model':
'speedyspeech_csmsc.pdmodel', 'speedyspeech_csmsc.pdmodel',
'params': 'params':
@ -57,19 +55,23 @@ pretrained_models = {
'phone_id_map.txt', 'phone_id_map.txt',
'tones_dict': 'tones_dict':
'tone_id_map.txt', 'tone_id_map.txt',
'sample_rate':
24000,
}, },
# fastspeech2 # fastspeech2
"fastspeech2_csmsc-zh": { "fastspeech2_csmsc-zh": {
'url': 'url':
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip', 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip',
'md5': 'md5':
'8eb01c2e4bc7e8b59beaa9fa046069cf', '9788cd9745e14c7a5d12d32670b2a5a7',
'model': 'model':
'fastspeech2_csmsc.pdmodel', 'fastspeech2_csmsc.pdmodel',
'params': 'params':
'fastspeech2_csmsc.pdiparams', 'fastspeech2_csmsc.pdiparams',
'phones_dict': 'phones_dict':
'phone_id_map.txt', 'phone_id_map.txt',
'sample_rate':
24000,
}, },
# pwgan # pwgan
"pwgan_csmsc-zh": { "pwgan_csmsc-zh": {
@ -139,6 +141,7 @@ class TTSServerExecutor(TTSExecutor):
am: str='fastspeech2_csmsc', am: str='fastspeech2_csmsc',
am_model: Optional[os.PathLike]=None, am_model: Optional[os.PathLike]=None,
am_params: Optional[os.PathLike]=None, am_params: Optional[os.PathLike]=None,
sample_rate: int=24000,
phones_dict: Optional[os.PathLike]=None, phones_dict: Optional[os.PathLike]=None,
tones_dict: Optional[os.PathLike]=None, tones_dict: Optional[os.PathLike]=None,
speaker_dict: Optional[os.PathLike]=None, speaker_dict: Optional[os.PathLike]=None,
@ -156,11 +159,7 @@ class TTSServerExecutor(TTSExecutor):
return return
# am # am
am_tag = am + '-' + lang am_tag = am + '-' + lang
if phones_dict is None: if am_model is None or am_params is None or phones_dict is None:
print("please input phones_dict!")
### 后续下载的模型里加上 phone 和 tone的 dict 就不用这个了
#if am_model is None or am_params is None or phones_dict is None:
if am_model is None or am_params is None:
am_res_path = self._get_pretrained_path(am_tag) am_res_path = self._get_pretrained_path(am_tag)
self.am_res_path = am_res_path self.am_res_path = am_res_path
self.am_model = os.path.join(am_res_path, self.am_model = os.path.join(am_res_path,
@ -168,10 +167,10 @@ class TTSServerExecutor(TTSExecutor):
self.am_params = os.path.join(am_res_path, self.am_params = os.path.join(am_res_path,
pretrained_models[am_tag]['params']) pretrained_models[am_tag]['params'])
# must have phones_dict in acoustic # must have phones_dict in acoustic
#self.phones_dict = os.path.join( self.phones_dict = os.path.join(
#am_res_path, pretrained_models[am_tag]['phones_dict']) am_res_path, pretrained_models[am_tag]['phones_dict'])
self.sample_rate = pretrained_models[am_tag]['sample_rate']
self.phones_dict = os.path.abspath(phones_dict)
logger.info(am_res_path) logger.info(am_res_path)
logger.info(self.am_model) logger.info(self.am_model)
logger.info(self.am_params) logger.info(self.am_params)
@ -179,6 +178,7 @@ class TTSServerExecutor(TTSExecutor):
self.am_model = os.path.abspath(am_model) self.am_model = os.path.abspath(am_model)
self.am_params = os.path.abspath(am_params) self.am_params = os.path.abspath(am_params)
self.phones_dict = os.path.abspath(phones_dict) self.phones_dict = os.path.abspath(phones_dict)
self.sample_rate = sample_rate
self.am_res_path = os.path.dirname(os.path.abspath(self.am_model)) self.am_res_path = os.path.dirname(os.path.abspath(self.am_model))
print("self.phones_dict:", self.phones_dict) print("self.phones_dict:", self.phones_dict)
@ -343,6 +343,7 @@ class TTSEngine(BaseEngine):
am=self.conf_dict["am"], am=self.conf_dict["am"],
am_model=self.conf_dict["am_model"], am_model=self.conf_dict["am_model"],
am_params=self.conf_dict["am_params"], am_params=self.conf_dict["am_params"],
sample_rate=self.conf_dict["sample_rate"],
phones_dict=self.conf_dict["phones_dict"], phones_dict=self.conf_dict["phones_dict"],
tones_dict=self.conf_dict["tones_dict"], tones_dict=self.conf_dict["tones_dict"],
speaker_dict=self.conf_dict["speaker_dict"], speaker_dict=self.conf_dict["speaker_dict"],
@ -450,8 +451,7 @@ class TTSEngine(BaseEngine):
try: try:
target_sample_rate, wav_base64 = self.postprocess( target_sample_rate, wav_base64 = self.postprocess(
wav=self.executor._outputs['wav'].numpy(), wav=self.executor._outputs['wav'].numpy(),
#original_fs=self.executor.am_config.fs, original_fs=self.executor.sample_rate,
original_fs=24000, # TODO get sample rate from model
target_fs=sample_rate, target_fs=sample_rate,
volume=volume, volume=volume,
speed=speed, speed=speed,

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os import os
from typing import List
from typing import Optional from typing import Optional
from paddle.inference import Config from paddle.inference import Config
@ -31,21 +32,20 @@ def init_predictor(model_dir: Optional[os.PathLike]=None,
predictor_conf (dict, optional): The configuration parameters of predictor. Defaults to None. predictor_conf (dict, optional): The configuration parameters of predictor. Defaults to None.
Returns: Returns:
[type]: [description] predictor (PaddleInferPredictor): created predictor
""" """
if model_dir is not None: if model_dir is not None:
config = Config(args.model_dir) config = Config(args.model_dir)
else: else:
config = Config(model_file, params_file) config = Config(model_file, params_file)
config.enable_memory_optim() config.enable_memory_optim()
if "use_gpu" in predictor_conf and predictor_conf["use_gpu"] == "true": if predictor_conf["use_gpu"]:
config.enable_use_gpu(1000, 0) config.enable_use_gpu(1000, 0)
if "enable_mkldnn" in predictor_conf and predictor_conf[ if predictor_conf["enable_mkldnn"]:
"enable_mkldnn"] == "true":
config.enable_mkldnn() config.enable_mkldnn()
if "switch_ir_optim" in predictor_conf and predictor_conf[ if predictor_conf["switch_ir_optim"]:
"switch_ir_optim"] == "true":
config.switch_ir_optim() config.switch_ir_optim()
predictor = create_predictor(config) predictor = create_predictor(config)
@ -53,7 +53,7 @@ def init_predictor(model_dir: Optional[os.PathLike]=None,
return predictor return predictor
def run_model(predictor, input: list): def run_model(predictor, input: List) -> List:
""" run predictor """ run predictor
Args: Args:

Loading…
Cancel
Save