fix Voc5/Jets with CSMSC

pull/3906/head
liyulingyue 10 months ago
parent 231b78c828
commit f75cc25137

@ -5,6 +5,14 @@ This example contains code used to train a [SpeedySpeech](http://arxiv.org/abs/2
### Download and Extract
Download CSMSC from it's [Official Website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.
The structure of the folder is listed below.
```text
datasets/BZNSYP
└── Wave
└── .wav files
```
### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for SPEEDYSPEECH.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

@ -241,9 +241,9 @@ def main():
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

@ -81,9 +81,9 @@ def evaluate(args, fastspeech2_config):
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

@ -271,9 +271,9 @@ def main():
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

@ -203,7 +203,6 @@ def main():
sentences, speaker_set = get_phn_dur(dur_file)
merge_silence(sentences)
# split data into 3 sections
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections, the max number of dev/test is 10% or 100

@ -314,9 +314,9 @@ def main():
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

@ -90,9 +90,9 @@ def evaluate(args, speedyspeech_config):
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

@ -237,9 +237,9 @@ def main():
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

@ -228,9 +228,9 @@ def main():
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

@ -241,9 +241,9 @@ def main():
if args.dataset == "baker":
wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
# split data into 3 sections
num_train = 9800
num_dev = 100
# split data into 3 sections, the max number of dev/test is 10% or 100
num_dev = min(int(len(wav_files) * 0.1), 100)
num_train = len(wav_files) - num_dev * 2
train_wav_files = wav_files[:num_train]
dev_wav_files = wav_files[num_train:num_train + num_dev]
test_wav_files = wav_files[num_train + num_dev:]

Loading…
Cancel
Save