fix Voc5/Jets with CSMSC

10 months ago · f75cc25137
parent 231b78c828
commit f75cc25137
10 changed files with 32 additions and 25 deletions
--- a/examples/csmsc/tts2/README.md
+++ b/examples/csmsc/tts2/README.md
@ -5,6 +5,14 @@ This example contains code used to train a [SpeedySpeech](http://arxiv.org/abs/2
 ### Download and Extract
 Download CSMSC from it's [Official Website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

+The structure of the folder is listed below.
+
+```text
+datasets/BZNSYP
+└── Wave
+    └── .wav files
+```
+
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for SPEEDYSPEECH.
 You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
--- a/paddlespeech/t2s/exps/ernie_sat/preprocess.py
+++ b/paddlespeech/t2s/exps/ernie_sat/preprocess.py
@ -241,9 +241,9 @@ def main():

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
--- a/paddlespeech/t2s/exps/fastspeech2/gen_gta_mel.py
+++ b/paddlespeech/t2s/exps/fastspeech2/gen_gta_mel.py
@ -81,9 +81,9 @@ def evaluate(args, fastspeech2_config):

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
--- a/paddlespeech/t2s/exps/fastspeech2/preprocess.py
+++ b/paddlespeech/t2s/exps/fastspeech2/preprocess.py
@ -271,9 +271,9 @@ def main():

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
--- a/paddlespeech/t2s/exps/gan_vocoder/preprocess.py
+++ b/paddlespeech/t2s/exps/gan_vocoder/preprocess.py
@ -203,7 +203,6 @@ def main():
        sentences, speaker_set = get_phn_dur(dur_file)
        merge_silence(sentences)

-    # split data into 3 sections
    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections, the max number of dev/test is 10% or 100
--- a/paddlespeech/t2s/exps/jets/preprocess.py
+++ b/paddlespeech/t2s/exps/jets/preprocess.py
@ -314,9 +314,9 @@ def main():

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
--- a/paddlespeech/t2s/exps/speedyspeech/gen_gta_mel.py
+++ b/paddlespeech/t2s/exps/speedyspeech/gen_gta_mel.py
@ -90,9 +90,9 @@ def evaluate(args, speedyspeech_config):

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
--- a/paddlespeech/t2s/exps/speedyspeech/preprocess.py
+++ b/paddlespeech/t2s/exps/speedyspeech/preprocess.py
@ -237,9 +237,9 @@ def main():

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
--- a/paddlespeech/t2s/exps/tacotron2/preprocess.py
+++ b/paddlespeech/t2s/exps/tacotron2/preprocess.py
@ -228,9 +228,9 @@ def main():

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
--- a/paddlespeech/t2s/exps/vits/preprocess.py
+++ b/paddlespeech/t2s/exps/vits/preprocess.py
@ -241,9 +241,9 @@ def main():

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
-        # split data into 3 sections
-        num_train = 9800
-        num_dev = 100
+        # split data into 3 sections, the max number of dev/test is 10% or 100
+        num_dev = min(int(len(wav_files) * 0.1), 100)
+        num_train = len(wav_files) - num_dev * 2
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]