update vctk voc1, test=tts (#1294)

pull/1301/head
TianYuan 3 years ago committed by GitHub
parent 9c1e098693
commit fb238d83f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -95,16 +95,16 @@ optional arguments:
### Synthesizing ### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc1) as the neural vocoder. We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_vctk_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.5.zip)and unzip it. Download pretrained parallel wavegan model from [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip) and unzip it.
```bash ```bash
unzip pwg_vctk_ckpt_0.5.zip unzip pwg_vctk_ckpt_0.1.1.zip
``` ```
Parallel WaveGAN checkpoint contains files listed below. Parallel WaveGAN checkpoint contains files listed below.
```text ```text
pwg_vctk_ckpt_0.5 pwg_vctk_ckpt_0.1.1
├── pwg_default.yaml # default config used to train parallel wavegan ├── default.yaml # default config used to train parallel wavegan
├── pwg_snapshot_iter_1000000.pdz # generator parameters of parallel wavegan ├── snapshot_iter_1500000.pdz # generator parameters of parallel wavegan
└── pwg_stats.npy # statistics used to normalize spectrogram when training parallel wavegan └── feats_stats.npy # statistics used to normalize spectrogram when training parallel wavegan
``` ```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`. `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash ```bash

@ -12,9 +12,9 @@ python3 ${BIN_DIR}/../synthesize.py \
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
--am_stat=dump/train/speech_stats.npy \ --am_stat=dump/train/speech_stats.npy \
--voc=pwgan_vctk \ --voc=pwgan_vctk \
--voc_config=pwg_vctk_ckpt_0.5/pwg_default.yaml \ --voc_config=pwg_vctk_ckpt_0.1.1/default.yaml \
--voc_ckpt=pwg_vctk_ckpt_0.5/pwg_snapshot_iter_1000000.pdz \ --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \
--voc_stat=pwg_vctk_ckpt_0.5/pwg_stats.npy \ --voc_stat=pwg_vctk_ckpt_0.1.1/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \

@ -12,9 +12,9 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
--am_stat=dump/train/speech_stats.npy \ --am_stat=dump/train/speech_stats.npy \
--voc=pwgan_vctk \ --voc=pwgan_vctk \
--voc_config=pwg_vctk_ckpt_0.5/pwg_default.yaml \ --voc_config=pwg_vctk_ckpt_0.1.1/default.yaml \
--voc_ckpt=pwg_vctk_ckpt_0.5/pwg_snapshot_iter_1000000.pdz \ --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \
--voc_stat=pwg_vctk_ckpt_0.5/pwg_stats.npy \ --voc_stat=pwg_vctk_ckpt_0.1.1/feats_stats.npy \
--lang=en \ --lang=en \
--text=${BIN_DIR}/../sentences_en.txt \ --text=${BIN_DIR}/../sentences_en.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \

@ -132,15 +132,15 @@ optional arguments:
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu. 5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
## Pretrained Model ## Pretrained Model
Pretrained models can be downloaded here [pwg_vctk_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.5.zip). Pretrained models can be downloaded here [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip).
Parallel WaveGAN checkpoint contains files listed below. Parallel WaveGAN checkpoint contains files listed below.
```text ```text
pwg_vctk_ckpt_0.5 pwg_vctk_ckpt_0.1.1
├── pwg_default.yaml # default config used to train parallel wavegan ├── default.yaml # default config used to train parallel wavegan
├── pwg_snapshot_iter_1000000.pdz # generator parameters of parallel wavegan ├── snapshot_iter_1500000.pdz # generator parameters of parallel wavegan
└── pwg_stats.npy # statistics used to normalize spectrogram when training parallel wavegan └── feats_stats.npy # statistics used to normalize spectrogram when training parallel wavegan
``` ```
## Acknowledgement ## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN. We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.

@ -70,7 +70,7 @@ lambda_adv: 4.0 # Loss balancing coefficient.
########################################################### ###########################################################
# DATA LOADER SETTING # # DATA LOADER SETTING #
########################################################### ###########################################################
batch_size: 8 # Batch size. batch_size: 6 # Batch size.
batch_max_steps: 24000 # Length of each audio in batch. Make sure dividable by n_shift. batch_max_steps: 24000 # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2 # Number of workers in DataLoader. num_workers: 2 # Number of workers in DataLoader.
@ -100,7 +100,7 @@ discriminator_grad_norm: 1 # Discriminator's gradient norm.
# INTERVAL SETTING # # INTERVAL SETTING #
########################################################### ###########################################################
discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator. discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator.
train_max_steps: 1000000 # Number of training steps. train_max_steps: 1500000 # Number of training steps.
save_interval_steps: 5000 # Interval steps to save checkpoint. save_interval_steps: 5000 # Interval steps to save checkpoint.
eval_interval_steps: 1000 # Interval steps to evaluate the network. eval_interval_steps: 1000 # Interval steps to evaluate the network.

@ -156,15 +156,15 @@ pretrained_models = {
}, },
"pwgan_vctk-en": { "pwgan_vctk-en": {
'url': 'url':
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.5.zip', 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip',
'md5': 'md5':
'322ca688aec9b127cec2788b65aa3d52', 'b3da1defcde3e578be71eb284cb89f2c',
'config': 'config':
'pwg_default.yaml', 'default.yaml',
'ckpt': 'ckpt':
'pwg_snapshot_iter_1000000.pdz', 'snapshot_iter_1500000.pdz',
'speech_stats': 'speech_stats':
'pwg_stats.npy', 'feats_stats.npy',
}, },
# mb_melgan # mb_melgan
"mb_melgan_csmsc-zh": { "mb_melgan_csmsc-zh": {

Loading…
Cancel
Save