Merge pull request #2249 from yt605155624/fix_mix_cli

[tts][cli]update mix tts
3 years ago · ac385053ba
parent ed18b08d07 5d515f3f3f
commit ac385053ba
3 changed files with 25 additions and 5 deletions
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@ -255,9 +255,9 @@ class TTSExecutor(BaseExecutor):
        else:
            use_pretrained_voc = False
        voc_lang = lang
-        # we must use ljspeech's voc for mix am now!
+        # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's
        if lang == 'mix':
-            voc_lang = 'en'
+            voc_lang = 'zh'
        voc_tag = voc + '-' + voc_lang
        self.task_resource.set_task_model(
            model_tag=voc_tag,
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@ -672,6 +672,22 @@ tts_dynamic_pretrained_models = {
            'speaker_dict':
            'speaker_id_map.txt',
        },
        '2.0': {
            'url':
            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_0.2.0.zip',
            'md5':
            '1d938e104e972386c8bfcbcc98a91587',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_99200.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
        },
    },
    # tacotron2
    "tacotron2_csmsc-zh": {
--- a/tests/unit/cli/test_cli.sh
+++ b/tests/unit/cli/test_cli.sh
@ -56,9 +56,13 @@ paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input
 # mix tts
 # The `am` must be `fastspeech2_mix`!
 # The `lang` must be `mix`!
-# The voc must be `hifigan_ljspeech` or `pwgan_ljspeech` for f`astspeech2_mix` now!
+# The voc must be chinese datasets' voc now!
-paddlespeech tts --am fastspeech2_mix --voc hifigan_ljspeech --lang mix  --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 0  --output mix_spk0.wav
+# spk 174 is csmcc, spk 175 is ljspeech
-paddlespeech tts --am fastspeech2_mix --voc pwgan_ljspeech --lang mix  --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 1  --output mix_spk1.wav
+paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174.wav
 paddlespeech tts --am fastspeech2_mix --voc hifigan_aishell3 --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174_aishell3.wav
 paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
 paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
 # Speech Translation (only support linux)
 paddlespeech st --input ./en.wav