update mix tts

2 years ago · 5d515f3f3f
parent ed18b08d07
commit 5d515f3f3f
3 changed files with 25 additions and 5 deletions
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@ -255,9 +255,9 @@ class TTSExecutor(BaseExecutor):
        else:
            use_pretrained_voc = False
        voc_lang = lang
-        # we must use ljspeech's voc for mix am now!
+        # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's
        if lang == 'mix':
-            voc_lang = 'en'
+            voc_lang = 'zh'
        voc_tag = voc + '-' + voc_lang
        self.task_resource.set_task_model(
            model_tag=voc_tag,
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@ -672,6 +672,22 @@ tts_dynamic_pretrained_models = {
            'speaker_dict':
            'speaker_id_map.txt',
        },
+        '2.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_0.2.0.zip',
+            'md5':
+            '1d938e104e972386c8bfcbcc98a91587',
+            'config':
+            'default.yaml',
+            'ckpt':
+            'snapshot_iter_99200.pdz',
+            'speech_stats':
+            'speech_stats.npy',
+            'phones_dict':
+            'phone_id_map.txt',
+            'speaker_dict':
+            'speaker_id_map.txt',
+        },
    },
    # tacotron2
    "tacotron2_csmsc-zh": {
--- a/tests/unit/cli/test_cli.sh
+++ b/tests/unit/cli/test_cli.sh
@ -56,9 +56,13 @@ paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input
 # mix tts
 # The `am` must be `fastspeech2_mix`!
 # The `lang` must be `mix`!
-# The voc must be `hifigan_ljspeech` or `pwgan_ljspeech` for f`astspeech2_mix` now!
-paddlespeech tts --am fastspeech2_mix --voc hifigan_ljspeech --lang mix  --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 0  --output mix_spk0.wav
-paddlespeech tts --am fastspeech2_mix --voc pwgan_ljspeech --lang mix  --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 1  --output mix_spk1.wav
+# The voc must be chinese datasets' voc now!
+# spk 174 is csmcc, spk 175 is ljspeech
+paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174.wav
+paddlespeech tts --am fastspeech2_mix --voc hifigan_aishell3 --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174_aishell3.wav
+paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
+paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
+

 # Speech Translation (only support linux)
 paddlespeech st --input ./en.wav