diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 6334211a..60fa9eb8 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -82,6 +82,7 @@ class TTSExecutor(BaseExecutor): 'tacotron2_csmsc', 'tacotron2_ljspeech', 'fastspeech2_male', + 'fastspeech2_canton', ], help='Choose acoustic model type of tts task.') self.parser.add_argument( @@ -273,7 +274,7 @@ class TTSExecutor(BaseExecutor): use_pretrained_voc = False voc_lang = lang # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's - if lang == 'mix': + if lang == 'mix' or lang == 'canton': voc_dataset = voc[voc.rindex('_') + 1:] if voc_dataset in {"ljspeech", "vctk"}: voc_lang = 'en' @@ -487,7 +488,7 @@ class TTSExecutor(BaseExecutor): # fastspeech2 else: # multi speaker - if am_dataset in {'aishell3', 'vctk', 'mix'}: + if am_dataset in {'aishell3', 'vctk', 'mix', 'canton'}: mel = self.am_inference( part_phone_ids, spk_id=paddle.to_tensor(spk_id)) else: diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py index 82c7776e..96010610 100644 --- a/paddlespeech/resource/pretrained_models.py +++ b/paddlespeech/resource/pretrained_models.py @@ -889,6 +889,24 @@ tts_dynamic_pretrained_models = { 'phone_id_map.txt', }, }, + "fastspeech2_canton-canton": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip', + 'md5': + '504560c082deba82120927627c900374', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_140000.pdz', + 'speech_stats': + 'speech_stats.npy', + 'phones_dict': + 'phone_id_map.txt', + 'speaker_dict': + 'speaker_id_map.txt', + }, + }, "fastspeech2_ljspeech-en": { '1.0': { 'url': diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index 6b525268..a7f7d11e 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -61,6 +61,7 @@ paddlespeech tts --am tacotron2_csmsc --voc wavernn_csmsc --input "你好,欢 paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang zh --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." +paddlespeech tts --am fastspeech2_canton --voc pwgan_aishell3 --input "各个国家有各个国家嘅国歌" --lang canton --spk_id 10 # mix tts # The `am` must be `fastspeech2_mix`!