Merge pull request #2203 from yt605155624/mix_cli

[TTS]add mix tts cli
pull/2214/head
TianYuan 2 years ago committed by GitHub
commit 80c219b774
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -29,8 +29,7 @@ from yacs.config import CfgNode
from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.t2s.frontend import English
from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.modules.normalizer import ZScore
__all__ = ['TTSExecutor']
@ -54,6 +53,7 @@ class TTSExecutor(BaseExecutor):
'fastspeech2_ljspeech',
'fastspeech2_aishell3',
'fastspeech2_vctk',
'fastspeech2_mix',
'tacotron2_csmsc',
'tacotron2_ljspeech',
],
@ -98,7 +98,7 @@ class TTSExecutor(BaseExecutor):
self.parser.add_argument(
'--voc',
type=str,
default='pwgan_csmsc',
default='hifigan_csmsc',
choices=[
'pwgan_csmsc',
'pwgan_ljspeech',
@ -135,7 +135,7 @@ class TTSExecutor(BaseExecutor):
'--lang',
type=str,
default='zh',
help='Choose model language. zh or en')
help='Choose model language. zh or en or mix')
self.parser.add_argument(
'--device',
type=str,
@ -231,8 +231,11 @@ class TTSExecutor(BaseExecutor):
use_pretrained_voc = True
else:
use_pretrained_voc = False
voc_tag = voc + '-' + lang
voc_lang = lang
# we must use ljspeech's voc for mix am now!
if lang == 'mix':
voc_lang = 'en'
voc_tag = voc + '-' + voc_lang
self.task_resource.set_task_model(
model_tag=voc_tag,
model_type=1, # vocoder
@ -281,13 +284,8 @@ class TTSExecutor(BaseExecutor):
spk_num = len(spk_id)
# frontend
if lang == 'zh':
self.frontend = Frontend(
phone_vocab_path=self.phones_dict,
tone_vocab_path=self.tones_dict)
elif lang == 'en':
self.frontend = English(phone_vocab_path=self.phones_dict)
self.frontend = get_frontend(
lang=lang, phones_dict=self.phones_dict, tones_dict=self.tones_dict)
# acoustic model
odim = self.am_config.n_mels
@ -381,8 +379,12 @@ class TTSExecutor(BaseExecutor):
input_ids = self.frontend.get_input_ids(
text, merge_sentences=merge_sentences)
phone_ids = input_ids["phone_ids"]
elif lang == 'mix':
input_ids = self.frontend.get_input_ids(
text, merge_sentences=merge_sentences)
phone_ids = input_ids["phone_ids"]
else:
logger.error("lang should in {'zh', 'en'}!")
logger.error("lang should in {'zh', 'en', 'mix'}!")
self.frontend_time = time.time() - frontend_st
self.am_time = 0
@ -398,7 +400,7 @@ class TTSExecutor(BaseExecutor):
# fastspeech2
else:
# multi speaker
if am_dataset in {"aishell3", "vctk"}:
if am_dataset in {'aishell3', 'vctk', 'mix'}:
mel = self.am_inference(
part_phone_ids, spk_id=paddle.to_tensor(spk_id))
else:

@ -655,6 +655,24 @@ tts_dynamic_pretrained_models = {
'phone_id_map.txt',
},
},
"fastspeech2_mix-mix": {
'1.0': {
'url':
'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip',
'md5':
'77d9d4b5a79ed6203339ead7ef6c74f9',
'config':
'default.yaml',
'ckpt':
'snapshot_iter_94000.pdz',
'speech_stats':
'speech_stats.npy',
'phones_dict':
'phone_id_map.txt',
'speaker_dict':
'speaker_id_map.txt',
},
},
# tacotron2
"tacotron2_csmsc-zh": {
'1.0': {

@ -43,7 +43,7 @@ paddlespeech tts --input "你好,欢迎使用百度飞桨深度学习框架!
paddlespeech tts --am speedyspeech_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!"
paddlespeech tts --voc mb_melgan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!"
paddlespeech tts --voc style_melgan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!"
paddlespeech tts --voc hifigan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!"
paddlespeech tts --voc pwgan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!"
paddlespeech tts --am fastspeech2_aishell3 --voc pwgan_aishell3 --input "你好,欢迎使用百度飞桨深度学习框架!" --spk_id 0
paddlespeech tts --am fastspeech2_aishell3 --voc hifigan_aishell3 --input "你好,欢迎使用百度飞桨深度学习框架!" --spk_id 0
paddlespeech tts --am fastspeech2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get."
@ -53,6 +53,12 @@ paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like
paddlespeech tts --am tacotron2_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!"
paddlespeech tts --am tacotron2_csmsc --voc wavernn_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!"
paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get."
# mix tts
# The `am` must be `fastspeech2_mix`!
# The `lang` must be `mix`!
# The voc must be `hifigan_ljspeech` or `pwgan_ljspeech` for f`astspeech2_mix` now!
paddlespeech tts --am fastspeech2_mix --voc hifigan_ljspeech --lang mix --input "热烈欢迎您在 Discussions 中提交问题,并在 Issues 中指出发现的 bug。此外我们非常希望您参与到 Paddle Speech 的开发中!" --spk_id 0 --output mix_spk0.wav
paddlespeech tts --am fastspeech2_mix --voc pwgan_ljspeech --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 1 --output mix_spk1.wav
# Speech Translation (only support linux)
paddlespeech st --input ./en.wav

Loading…
Cancel
Save