From c1395e3a051b65d6503dd1a67b3708c6c1348120 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Thu, 14 Jul 2022 11:55:15 +0000 Subject: [PATCH] add synthesize for ernie_sat aishell3 and aishell3_vctk, test=tts --- .../aishell3/ernie_sat/local/synthesize.sh | 43 ++++++++++++++++++- examples/aishell3/tts3/local/synthesize.sh | 2 +- .../ernie_sat/local/synthesize.sh | 43 ++++++++++++++++++- 3 files changed, 85 insertions(+), 3 deletions(-) diff --git a/examples/aishell3/ernie_sat/local/synthesize.sh b/examples/aishell3/ernie_sat/local/synthesize.sh index cc1f786e..3e907427 100755 --- a/examples/aishell3/ernie_sat/local/synthesize.sh +++ b/examples/aishell3/ernie_sat/local/synthesize.sh @@ -1 +1,42 @@ -#!/bin/bash \ No newline at end of file +#!/bin/bash + +config_path=$1 +train_output_path=$2 +ckpt_name=$3 + +stage=1 +stop_stage=1 + +# pwgan +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/synthesize.py \ + --erniesat_config=${config_path} \ + --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --erniesat_stat=dump/train/speech_stats.npy \ + --voc=pwgan_aishell3 \ + --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \ + --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ + --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# hifigan +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/synthesize.py \ + --erniesat_config=${config_path} \ + --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --erniesat_stat=dump/train/speech_stats.npy \ + --voc=hifigan_aishell3 \ + --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \ + --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ + --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi diff --git a/examples/aishell3/tts3/local/synthesize.sh b/examples/aishell3/tts3/local/synthesize.sh index d3978833..9134e042 100755 --- a/examples/aishell3/tts3/local/synthesize.sh +++ b/examples/aishell3/tts3/local/synthesize.sh @@ -37,7 +37,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am_stat=dump/train/speech_stats.npy \ --voc=hifigan_aishell3 \ --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \ - --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pd \ + --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ --test_metadata=dump/test/norm/metadata.jsonl \ --output_dir=${train_output_path}/test \ diff --git a/examples/aishell3_vctk/ernie_sat/local/synthesize.sh b/examples/aishell3_vctk/ernie_sat/local/synthesize.sh index cc1f786e..3e907427 100755 --- a/examples/aishell3_vctk/ernie_sat/local/synthesize.sh +++ b/examples/aishell3_vctk/ernie_sat/local/synthesize.sh @@ -1 +1,42 @@ -#!/bin/bash \ No newline at end of file +#!/bin/bash + +config_path=$1 +train_output_path=$2 +ckpt_name=$3 + +stage=1 +stop_stage=1 + +# pwgan +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/synthesize.py \ + --erniesat_config=${config_path} \ + --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --erniesat_stat=dump/train/speech_stats.npy \ + --voc=pwgan_aishell3 \ + --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \ + --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ + --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# hifigan +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/synthesize.py \ + --erniesat_config=${config_path} \ + --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --erniesat_stat=dump/train/speech_stats.npy \ + --voc=hifigan_aishell3 \ + --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \ + --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ + --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi