[TTS]soft link for shell in example, add skip_copy_wave in norm stage of G… (#2851)
soft link for shell in example, add skip_copy_wave in norm stage of GANVocoders to save diskpull/2852/head
parent
140aed4b54
commit
2b01e40525
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=fastspeech2
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/tts3/path.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=tacotron2
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/tts0/path.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=2 \
|
|
||||||
--phones-dict=dump/phone_id_map.txt \
|
|
||||||
--voice-cloning=True
|
|
@ -0,0 +1 @@
|
|||||||
|
../../vc0/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=fastspeech2
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/tts3/path.sh
|
@ -1,20 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../synthesize.py \
|
|
||||||
--am=fastspeech2_aishell3 \
|
|
||||||
--am_config=${config_path} \
|
|
||||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--am_stat=dump/train/speech_stats.npy \
|
|
||||||
--voc=pwgan_aishell3 \
|
|
||||||
--voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
|
|
||||||
--voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
|
|
||||||
--voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
|
|
||||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output_dir=${train_output_path}/test \
|
|
||||||
--phones_dict=dump/phone_id_map.txt \
|
|
||||||
--speaker_dict=dump/speaker_id_map.txt \
|
|
||||||
--voice-cloning=True
|
|
@ -0,0 +1 @@
|
|||||||
|
../../vc1/local/synthesize.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=2 \
|
|
||||||
--phones-dict=dump/phone_id_map.txt \
|
|
||||||
--voice-cloning=True
|
|
@ -0,0 +1 @@
|
|||||||
|
../../vc0/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=fastspeech2
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/tts3/path.sh
|
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
FLAGS_allocator_strategy=naive_best_fit \
|
|
||||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
|
||||||
python3 ${BIN_DIR}/../synthesize.py \
|
|
||||||
--config=${config_path} \
|
|
||||||
--checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--test-metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output-dir=${train_output_path}/test \
|
|
||||||
--generator-type=pwgan
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/synthesize.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=parallelwave_gan
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/voc1/path.sh
|
@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
stage=0
|
|
||||||
stop_stage=100
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
# get durations from MFA's result
|
|
||||||
echo "Generate durations.txt from MFA results ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
|
|
||||||
--inputdir=./aishell3_alignment_tone \
|
|
||||||
--output=durations.txt \
|
|
||||||
--config=${config_path}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
||||||
# extract features
|
|
||||||
echo "Extract features ..."
|
|
||||||
python3 ${BIN_DIR}/../preprocess.py \
|
|
||||||
--rootdir=~/datasets/data_aishell3/ \
|
|
||||||
--dataset=aishell3 \
|
|
||||||
--dumpdir=dump \
|
|
||||||
--dur-file=durations.txt \
|
|
||||||
--config=${config_path} \
|
|
||||||
--cut-sil=True \
|
|
||||||
--num-cpu=20
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
||||||
# get features' stats(mean and std)
|
|
||||||
echo "Get features' stats ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/compute_statistics.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--field-name="feats"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
||||||
# normalize, dev and test should use train's stats
|
|
||||||
echo "Normalize ..."
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/train/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/dev/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/dev/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/test/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/test/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/preprocess.sh
|
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
FLAGS_allocator_strategy=naive_best_fit \
|
|
||||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
|
||||||
python3 ${BIN_DIR}/../synthesize.py \
|
|
||||||
--config=${config_path} \
|
|
||||||
--checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--test-metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output-dir=${train_output_path}/test \
|
|
||||||
--generator-type=hifigan
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc5/local/synthesize.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=hifigan
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/voc5/path.sh
|
@ -1,25 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
stage=0
|
|
||||||
stop_stage=0
|
|
||||||
|
|
||||||
# hifigan
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
FLAGS_allocator_strategy=naive_best_fit \
|
|
||||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
|
||||||
python3 ${BIN_DIR}/synthesize.py \
|
|
||||||
--erniesat_config=${config_path} \
|
|
||||||
--erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--erniesat_stat=dump/train/speech_stats.npy \
|
|
||||||
--voc=hifigan_aishell3 \
|
|
||||||
--voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
|
|
||||||
--voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
|
|
||||||
--voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
|
|
||||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output_dir=${train_output_path}/test \
|
|
||||||
--phones_dict=dump/phone_id_map.txt
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../aishell3/ernie_sat/local/synthesize.sh
|
@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=8 \
|
|
||||||
--phones-dict=dump/phone_id_map.txt
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../aishell3/ernie_sat/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=ernie_sat
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../aishell3/ernie_sat/path.sh
|
@ -1,64 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
source path.sh
|
|
||||||
|
|
||||||
gpus=0
|
|
||||||
stage=0
|
|
||||||
stop_stage=100
|
|
||||||
|
|
||||||
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
|
|
||||||
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
python3 ${MAIN_ROOT}/paddlespeech/t2s/exps/fastspeech2/gen_gta_mel.py \
|
|
||||||
--fastspeech2-config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
|
|
||||||
--fastspeech2-checkpoint=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
|
|
||||||
--fastspeech2-stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
|
|
||||||
--dur-file=durations.txt \
|
|
||||||
--output-dir=dump_finetune \
|
|
||||||
--phones-dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt \
|
|
||||||
--dataset=baker \
|
|
||||||
--rootdir=~/datasets/BZNSYP/
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
||||||
python3 ${MAIN_ROOT}/utils/link_wav.py \
|
|
||||||
--old-dump-dir=dump \
|
|
||||||
--dump-dir=dump_finetune
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
||||||
# get features' stats(mean and std)
|
|
||||||
echo "Get features' stats ..."
|
|
||||||
cp dump/train/feats_stats.npy dump_finetune/train/
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
||||||
# normalize, dev and test should use train's stats
|
|
||||||
echo "Normalize ..."
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump_finetune/train/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump_finetune/train/norm \
|
|
||||||
--stats=dump_finetune/train/feats_stats.npy
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump_finetune/dev/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump_finetune/dev/norm \
|
|
||||||
--stats=dump_finetune/train/feats_stats.npy
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump_finetune/test/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump_finetune/test/norm \
|
|
||||||
--stats=dump_finetune/train/feats_stats.npy
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
|
||||||
CUDA_VISIBLE_DEVICES=${gpus} \
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump_finetune/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump_finetune/dev/norm/metadata.jsonl \
|
|
||||||
--config=conf/finetune.yaml \
|
|
||||||
--output-dir=exp/finetune \
|
|
||||||
--ngpu=1
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../voc5/finetune.sh
|
@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
stage=0
|
|
||||||
stop_stage=100
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
# get durations from MFA's result
|
|
||||||
echo "Generate durations.txt from MFA results ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
|
|
||||||
--inputdir=./baker_alignment_tone \
|
|
||||||
--output=durations.txt \
|
|
||||||
--config=${config_path}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
||||||
# extract features
|
|
||||||
echo "Extract features ..."
|
|
||||||
python3 ${BIN_DIR}/../preprocess.py \
|
|
||||||
--rootdir=~/datasets/BZNSYP/ \
|
|
||||||
--dataset=baker \
|
|
||||||
--dumpdir=dump \
|
|
||||||
--dur-file=durations.txt \
|
|
||||||
--config=${config_path} \
|
|
||||||
--cut-sil=True \
|
|
||||||
--num-cpu=20
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
||||||
# get features' stats(mean and std)
|
|
||||||
echo "Get features' stats ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/compute_statistics.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--field-name="feats"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
||||||
# normalize, dev and test should use train's stats
|
|
||||||
echo "Normalize ..."
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/train/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/dev/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/dev/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/test/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/test/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/preprocess.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/train.sh
|
@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
stage=0
|
|
||||||
stop_stage=100
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
# get durations from MFA's result
|
|
||||||
echo "Generate durations.txt from MFA results ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
|
|
||||||
--inputdir=./baker_alignment_tone \
|
|
||||||
--output=durations.txt \
|
|
||||||
--config=${config_path}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
||||||
# extract features
|
|
||||||
echo "Extract features ..."
|
|
||||||
python3 ${BIN_DIR}/../preprocess.py \
|
|
||||||
--rootdir=~/datasets/BZNSYP/ \
|
|
||||||
--dataset=baker \
|
|
||||||
--dumpdir=dump \
|
|
||||||
--dur-file=durations.txt \
|
|
||||||
--config=${config_path} \
|
|
||||||
--cut-sil=True \
|
|
||||||
--num-cpu=20
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
||||||
# get features' stats(mean and std)
|
|
||||||
echo "Get features' stats ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/compute_statistics.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--field-name="feats"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
||||||
# normalize, dev and test should use train's stats
|
|
||||||
echo "Normalize ..."
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/train/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/dev/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/dev/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/test/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/test/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/preprocess.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/train.sh
|
@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
stage=0
|
|
||||||
stop_stage=100
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
# get durations from MFA's result
|
|
||||||
echo "Generate durations.txt from MFA results ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
|
|
||||||
--inputdir=./baker_alignment_tone \
|
|
||||||
--output=durations.txt \
|
|
||||||
--config=${config_path}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
||||||
# extract features
|
|
||||||
echo "Extract features ..."
|
|
||||||
python3 ${BIN_DIR}/../preprocess.py \
|
|
||||||
--rootdir=~/datasets/BZNSYP/ \
|
|
||||||
--dataset=baker \
|
|
||||||
--dumpdir=dump \
|
|
||||||
--dur-file=durations.txt \
|
|
||||||
--config=${config_path} \
|
|
||||||
--cut-sil=True \
|
|
||||||
--num-cpu=20
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
||||||
# get features' stats(mean and std)
|
|
||||||
echo "Get features' stats ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/compute_statistics.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--field-name="feats"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
||||||
# normalize, dev and test should use train's stats
|
|
||||||
echo "Normalize ..."
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/train/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/dev/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/dev/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/test/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/test/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/preprocess.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/train.sh
|
@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1 \
|
|
||||||
--phones-dict=dump/phone_id_map.txt
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/tts0/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=tacotron2
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/tts0/path.sh
|
@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1 \
|
|
||||||
--phones-dict=dump/phone_id_map.txt
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/tts3/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=fastspeech2
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/tts3/path.sh
|
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
FLAGS_allocator_strategy=naive_best_fit \
|
|
||||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
|
||||||
python3 ${BIN_DIR}/../synthesize.py \
|
|
||||||
--config=${config_path} \
|
|
||||||
--checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--test-metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output-dir=${train_output_path}/test \
|
|
||||||
--generator-type=pwgan
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/synthesize.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=parallelwave_gan
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/voc1/path.sh
|
@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
stage=0
|
|
||||||
stop_stage=100
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
# get durations from MFA's result
|
|
||||||
echo "Generate durations.txt from MFA results ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
|
|
||||||
--inputdir=./ljspeech_alignment \
|
|
||||||
--output=durations.txt \
|
|
||||||
--config=${config_path}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
||||||
# extract features
|
|
||||||
echo "Extract features ..."
|
|
||||||
python3 ${BIN_DIR}/../preprocess.py \
|
|
||||||
--rootdir=~/datasets/LJSpeech-1.1/ \
|
|
||||||
--dataset=ljspeech \
|
|
||||||
--dumpdir=dump \
|
|
||||||
--dur-file=durations.txt \
|
|
||||||
--config=${config_path} \
|
|
||||||
--cut-sil=True \
|
|
||||||
--num-cpu=20
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
||||||
# get features' stats(mean and std)
|
|
||||||
echo "Get features' stats ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/compute_statistics.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--field-name="feats"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
||||||
# normalize, dev and test should use train's stats
|
|
||||||
echo "Normalize ..."
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/train/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/dev/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/dev/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/test/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/test/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/preprocess.sh
|
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
FLAGS_allocator_strategy=naive_best_fit \
|
|
||||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
|
||||||
python3 ${BIN_DIR}/../synthesize.py \
|
|
||||||
--config=${config_path} \
|
|
||||||
--checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--test-metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output-dir=${train_output_path}/test \
|
|
||||||
--generator-type=hifigan
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc5/local/synthesize.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=hifigan
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/voc5/path.sh
|
@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=8 \
|
|
||||||
--phones-dict=dump/phone_id_map.txt
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../aishell3/ernie_sat/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=ernie_sat
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../aishell3/ernie_sat/path.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1 \
|
|
||||||
--phones-dict=dump/phone_id_map.txt \
|
|
||||||
--speaker-dict=dump/speaker_id_map.txt
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../aishell3/tts3/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=fastspeech2
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/tts3/path.sh
|
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
FLAGS_allocator_strategy=naive_best_fit \
|
|
||||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
|
||||||
python3 ${BIN_DIR}/../synthesize.py \
|
|
||||||
--config=${config_path} \
|
|
||||||
--checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--test-metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output-dir=${train_output_path}/test \
|
|
||||||
--generator-type=pwgan
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/synthesize.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=parallelwave_gan
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/voc1/path.sh
|
@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
stage=0
|
|
||||||
stop_stage=100
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
|
|
||||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
||||||
# get durations from MFA's result
|
|
||||||
echo "Generate durations.txt from MFA results ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
|
|
||||||
--inputdir=./vctk_alignment \
|
|
||||||
--output=durations.txt \
|
|
||||||
--config=${config_path}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
||||||
# extract features
|
|
||||||
echo "Extract features ..."
|
|
||||||
python3 ${BIN_DIR}/../preprocess.py \
|
|
||||||
--rootdir=~/datasets/VCTK-Corpus-0.92/ \
|
|
||||||
--dataset=vctk \
|
|
||||||
--dumpdir=dump \
|
|
||||||
--dur-file=durations.txt \
|
|
||||||
--config=${config_path} \
|
|
||||||
--cut-sil=True \
|
|
||||||
--num-cpu=20
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
||||||
# get features' stats(mean and std)
|
|
||||||
echo "Get features' stats ..."
|
|
||||||
python3 ${MAIN_ROOT}/utils/compute_statistics.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--field-name="feats"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
||||||
# normalize, dev and test should use train's stats
|
|
||||||
echo "Normalize ..."
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/train/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/train/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/dev/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/dev/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
|
|
||||||
python3 ${BIN_DIR}/../normalize.py \
|
|
||||||
--metadata=dump/test/raw/metadata.jsonl \
|
|
||||||
--dumpdir=dump/test/norm \
|
|
||||||
--stats=dump/train/feats_stats.npy
|
|
||||||
fi
|
|
@ -0,0 +1 @@
|
|||||||
|
../../voc1/local/preprocess.sh
|
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
ckpt_name=$3
|
|
||||||
|
|
||||||
FLAGS_allocator_strategy=naive_best_fit \
|
|
||||||
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
|
|
||||||
python3 ${BIN_DIR}/../synthesize.py \
|
|
||||||
--config=${config_path} \
|
|
||||||
--checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
|
|
||||||
--test-metadata=dump/test/norm/metadata.jsonl \
|
|
||||||
--output-dir=${train_output_path}/test \
|
|
||||||
--generator-type=hifigan
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc5/local/synthesize.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
config_path=$1
|
|
||||||
train_output_path=$2
|
|
||||||
|
|
||||||
FLAGS_cudnn_exhaustive_search=true \
|
|
||||||
FLAGS_conv_workspace_size_limit=4000 \
|
|
||||||
python ${BIN_DIR}/train.py \
|
|
||||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
|
||||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
|
||||||
--config=${config_path} \
|
|
||||||
--output-dir=${train_output_path} \
|
|
||||||
--ngpu=1
|
|
@ -0,0 +1 @@
|
|||||||
|
../../../csmsc/voc1/local/train.sh
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export MAIN_ROOT=`realpath ${PWD}/../../../`
|
|
||||||
|
|
||||||
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
|
|
||||||
export LC_ALL=C
|
|
||||||
|
|
||||||
export PYTHONDONTWRITEBYTECODE=1
|
|
||||||
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
|
||||||
export PYTHONIOENCODING=UTF-8
|
|
||||||
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
|
|
||||||
|
|
||||||
MODEL=hifigan
|
|
||||||
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}
|
|
@ -0,0 +1 @@
|
|||||||
|
../../csmsc/voc5/path.sh
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue