pull/4068/head
zxcd 4 months ago
parent 7883aa6cbd
commit dd45a0c681

@ -24,12 +24,12 @@ fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir # train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips} CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips} || exit -1
fi fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# avg n best model # avg n best model
avg.sh best exp/${ckpt}/checkpoints ${avg_num} avg.sh best exp/${ckpt}/checkpoints ${avg_num} || exit -1
fi fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then

@ -99,9 +99,9 @@ pwg_baker_ckpt_0.4
``` ```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`. `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash ```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
``` ```
`--stage` controls the vocoder model during synthesis, which can use stage `0-4` to select the vocoder to use {`pwgan`, `multi band melgan`, `style melgan`, ` hifigan`, `wavernn`} The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, ` hifigan`, `wavernn`}
```text ```text
usage: synthesize.py [-h] usage: synthesize.py [-h]
@ -150,9 +150,9 @@ optional arguments:
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file. `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash ```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
``` ```
`--stage` controls the vocoder model during synthesis, which can use stage `0,1,3,4` to select the vocoder to use{`pwgan`, `multi band melgan`, `hifigan`, `wavernn`} The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
```text ```text
usage: synthesize_e2e.py [-h] usage: synthesize_e2e.py [-h]

@ -3,8 +3,8 @@
config_path=$1 config_path=$1
train_output_path=$2 train_output_path=$2
ckpt_name=$3 ckpt_name=$3
stage=0 stage=${4:-0}
stop_stage=0 stop_stage=${4:-0}
# pwgan # pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# for more GAN Vocoders # for more GAN Vocoders
@ -40,7 +40,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# style melgan # style melgan
@ -58,7 +58,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# hifigan # hifigan
@ -77,7 +77,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# wavernn # wavernn
@ -96,5 +96,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi

@ -4,8 +4,8 @@ config_path=$1
train_output_path=$2 train_output_path=$2
ckpt_name=$3 ckpt_name=$3
stage=0 stage=${4:-0}
stop_stage=0 stop_stage=${4:-0}
# TODO: tacotron2 动转静的结果没有动态图的响亮, 可能还是 decode 的时候某个函数动静不对齐 # TODO: tacotron2 动转静的结果没有动态图的响亮, 可能还是 decode 的时候某个函数动静不对齐
# pwgan # pwgan
@ -25,7 +25,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi
@ -47,7 +47,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi
# the pretrained models haven't release now # the pretrained models haven't release now
@ -68,7 +68,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--lang=zh \ --lang=zh \
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
# --inference_dir=${train_output_path}/inference # --inference_dir=${train_output_path}/inference
fi fi
@ -90,7 +90,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi
# wavernn # wavernn
@ -111,5 +111,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi

@ -27,15 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
fi fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# synthesize, vocoder is pwgan by default stage 0 # synthesize, vocoder is pwgan by default 0
# stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn} # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# synthesize_e2e, vocoder is pwgan by default stage 0 # synthesize_e2e, vocoder is pwgan by default 0
# stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn} # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

@ -107,9 +107,9 @@ pwg_baker_ckpt_0.4
``` ```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`. `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash ```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
``` ```
`--stage` controls the vocoder model during synthesis. The parameter values range from `0-4`, corresponding to the following five vocoder models: `pwgan`, `multi band melgan`, `style melgan`, `hifigan`, and `wavernn`. The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, ` hifigan`, `wavernn`}
```text ```text
usage: synthesize.py [-h] usage: synthesize.py [-h]
@ -157,9 +157,9 @@ optional arguments:
``` ```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file. `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash ```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
``` ```
`--stage` controls the vocoder model during synthesis. The parameter values are {`0,1,3,4`}, corresponding to the following four vocoder models: `pwgan`, `multi band melgan`, `hifigan`, and `wavernn`. The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
```text ```text
usage: synthesize_e2e.py [-h] usage: synthesize_e2e.py [-h]

@ -113,9 +113,9 @@ pwg_baker_ckpt_0.4
`./local/synthesize.sh` 调用 `${BIN_DIR}/../synthesize.py` 即可从 `metadata.jsonl`中合成波形。 `./local/synthesize.sh` 调用 `${BIN_DIR}/../synthesize.py` 即可从 `metadata.jsonl`中合成波形。
```bash ```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
``` ```
`--stage` 参数用于控制合成过程中使用的声码器模型。该参数的取值范围为 `0-4`,分别对应以下五种声码器模型:`pwgan`、`multi band melgan`、`style melgan`、`hifigan` 和 `wavernn` 最后一位参数 `0` 用于控制合成过程中使用的声码器模型。该参数的取值范围为 `0-4`,分别对应以下五种声码器模型:`pwgan`、`multi band melgan`、`style melgan`、`hifigan` 和 `wavernn`
```text ```text
usage: synthesize.py [-h] usage: synthesize.py [-h]
@ -164,9 +164,9 @@ optional arguments:
`./local/synthesize_e2e.sh` 调用 `${BIN_DIR}/../synthesize_e2e.py`,即可从文本文件中合成波形。 `./local/synthesize_e2e.sh` 调用 `${BIN_DIR}/../synthesize_e2e.py`,即可从文本文件中合成波形。
```bash ```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
``` ```
`--stage` 参数用于控制合成过程中使用的声码器模型。该参数的取值范围为{ `0,1,3,4`},分别对应以下四种声码器模型:`pwgan`、`multi band melgan`、`hifigan` 和 `wavernn` 最后一位参数 `0` 用于控制合成过程中使用的声码器模型。该参数的取值范围为 {`0,1,3,4`},分别对应以下四种声码器模型:`pwgan`、`multi band melgan`、`hifigan` 和 `wavernn`
```text ```text
usage: synthesize_e2e.py [-h] usage: synthesize_e2e.py [-h]

@ -3,8 +3,8 @@
config_path=$1 config_path=$1
train_output_path=$2 train_output_path=$2
ckpt_name=$3 ckpt_name=$3
stage=0 stage=${4:-0}
stop_stage=0 stop_stage=${4:-0}
# pwgan # pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# for more GAN Vocoders # for more GAN Vocoders
@ -40,7 +40,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# style melgan # style melgan
@ -58,7 +58,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# hifigan # hifigan
@ -77,7 +77,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi
# wavernn # wavernn
@ -96,5 +96,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
--test_metadata=dump/test/norm/metadata.jsonl \ --test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test \ --output_dir=${train_output_path}/test \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
fi fi

@ -3,9 +3,8 @@
config_path=$1 config_path=$1
train_output_path=$2 train_output_path=$2
ckpt_name=$3 ckpt_name=$3
stage=${4:-0}
stage=0 stop_stage=${4:-0}
stop_stage=0
# pwgan # pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
@ -24,7 +23,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi
# for more GAN Vocoders # for more GAN Vocoders
@ -45,7 +44,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi
# the pretrained models haven't release now # the pretrained models haven't release now
@ -66,7 +65,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--lang=zh \ --lang=zh \
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt --phones_dict=dump/phone_id_map.txt || exit -1
# --inference_dir=${train_output_path}/inference # --inference_dir=${train_output_path}/inference
fi fi
@ -88,7 +87,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi
@ -110,5 +109,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--text=${BIN_DIR}/../../assets/sentences.txt \ --text=${BIN_DIR}/../../assets/sentences.txt \
--output_dir=${train_output_path}/test_e2e \ --output_dir=${train_output_path}/test_e2e \
--phones_dict=dump/phone_id_map.txt \ --phones_dict=dump/phone_id_map.txt \
--inference_dir=${train_output_path}/inference --inference_dir=${train_output_path}/inference || exit -1
fi fi

@ -27,15 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
fi fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# synthesize, vocoder is pwgan by default stage 0 # synthesize, vocoder is pwgan by default 0
# use stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn} # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# synthesize_e2e, vocoder is pwgan by default stage 0 # synthesize_e2e, vocoder is pwgan by default 0
# use stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn} # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

Loading…
Cancel
Save