commit
1973d7c941
@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
train_output_path=$1
|
||||
|
||||
stage=0
|
||||
stop_stage=0
|
||||
|
||||
# for more GAN Vocoders
|
||||
# multi band melgan
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
python3 ${BIN_DIR}/../inference.py \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--am=speedyspeech_csmsc \
|
||||
--voc=mb_melgan_csmsc \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/pd_infer_out \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--device mlu
|
||||
fi
|
||||
|
||||
# hifigan
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
python3 ${BIN_DIR}/../inference.py \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--am=speedyspeech_csmsc \
|
||||
--voc=hifigan_csmsc \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/pd_infer_out \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--device mlu
|
||||
fi
|
@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
|
||||
train_output_path=$1
|
||||
|
||||
stage=0
|
||||
stop_stage=0
|
||||
|
||||
# pwgan
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
python3 ${BIN_DIR}/../inference.py \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--am=speedyspeech_csmsc \
|
||||
--voc=pwgan_csmsc \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/pd_infer_out \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--device npu
|
||||
fi
|
||||
|
||||
# for more GAN Vocoders
|
||||
# multi band melgan
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
python3 ${BIN_DIR}/../inference.py \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--am=speedyspeech_csmsc \
|
||||
--voc=mb_melgan_csmsc \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/pd_infer_out \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--device npu
|
||||
fi
|
||||
|
||||
# hifigan
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
python3 ${BIN_DIR}/../inference.py \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--am=speedyspeech_csmsc \
|
||||
--voc=hifigan_csmsc \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/pd_infer_out \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--device npu
|
||||
fi
|
@ -0,0 +1,99 @@
|
||||
#!/bin/bash
|
||||
|
||||
config_path=$1
|
||||
train_output_path=$2
|
||||
ckpt_name=$3
|
||||
|
||||
stage=0
|
||||
stop_stage=0
|
||||
|
||||
# for more GAN Vocoders
|
||||
# multi band melgan
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=mb_melgan_csmsc \
|
||||
--voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
|
||||
--voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
fi
|
||||
|
||||
# the pretrained models haven't release now
|
||||
# style melgan
|
||||
# style melgan's Dygraph to Static Graph is not ready now
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=style_melgan_csmsc \
|
||||
--voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
|
||||
--voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
# --inference_dir=${train_output_path}/inference
|
||||
fi
|
||||
|
||||
# hifigan
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=hifigan_csmsc \
|
||||
--voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
|
||||
--voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
fi
|
||||
|
||||
# wavernn
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
echo "in wavernn syn_e2e"
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=wavernn_csmsc \
|
||||
--voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
|
||||
--voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
|
||||
--voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
fi
|
@ -0,0 +1,124 @@
|
||||
#!/bin/bash
|
||||
|
||||
config_path=$1
|
||||
train_output_path=$2
|
||||
ckpt_name=$3
|
||||
|
||||
stage=0
|
||||
stop_stage=0
|
||||
|
||||
# pwgan
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=pwgan_csmsc \
|
||||
--voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
|
||||
--voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
|
||||
--voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
|
||||
|
||||
fi
|
||||
|
||||
# for more GAN Vocoders
|
||||
# multi band melgan
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=mb_melgan_csmsc \
|
||||
--voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
|
||||
--voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
||||
|
||||
# the pretrained models haven't release now
|
||||
# style melgan
|
||||
# style melgan's Dygraph to Static Graph is not ready now
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=style_melgan_csmsc \
|
||||
--voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
|
||||
--voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
# --inference_dir=${train_output_path}/inference
|
||||
fi
|
||||
|
||||
# hifigan
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=hifigan_csmsc \
|
||||
--voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
|
||||
--voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
||||
|
||||
# wavernn
|
||||
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
echo "in wavernn syn_e2e"
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize_e2e.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=wavernn_csmsc \
|
||||
--voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
|
||||
--voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
|
||||
--voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
|
||||
--lang=zh \
|
||||
--text=${BIN_DIR}/../../assets/sentences.txt \
|
||||
--output_dir=${train_output_path}/test_e2e \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--inference_dir=${train_output_path}/inference \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
@ -0,0 +1,90 @@
|
||||
#!/bin/bash
|
||||
|
||||
config_path=$1
|
||||
train_output_path=$2
|
||||
ckpt_name=$3
|
||||
stage=0
|
||||
stop_stage=0
|
||||
|
||||
# for more GAN Vocoders
|
||||
# multi band melgan
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=mb_melgan_csmsc \
|
||||
--voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
|
||||
--voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
fi
|
||||
|
||||
# style melgan
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=style_melgan_csmsc \
|
||||
--voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
|
||||
--voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
fi
|
||||
|
||||
# hifigan
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
echo "in hifigan syn"
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=hifigan_csmsc \
|
||||
--voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
|
||||
--voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
fi
|
||||
|
||||
# wavernn
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
echo "in wavernn syn"
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=wavernn_csmsc \
|
||||
--voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
|
||||
--voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
|
||||
--voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nmlu=1
|
||||
fi
|
@ -0,0 +1,110 @@
|
||||
#!/bin/bash
|
||||
|
||||
config_path=$1
|
||||
train_output_path=$2
|
||||
ckpt_name=$3
|
||||
stage=0
|
||||
stop_stage=0
|
||||
|
||||
# pwgan
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=pwgan_csmsc \
|
||||
--voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
|
||||
--voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
|
||||
--voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
||||
|
||||
# for more GAN Vocoders
|
||||
# multi band melgan
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=mb_melgan_csmsc \
|
||||
--voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
|
||||
--voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
||||
|
||||
# style melgan
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=style_melgan_csmsc \
|
||||
--voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
|
||||
--voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
||||
|
||||
# hifigan
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
echo "in hifigan syn"
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=hifigan_csmsc \
|
||||
--voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
|
||||
--voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
|
||||
--voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
||||
|
||||
# wavernn
|
||||
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
echo "in wavernn syn"
|
||||
FLAGS_allocator_strategy=naive_best_fit \
|
||||
python3 ${BIN_DIR}/../synthesize.py \
|
||||
--am=speedyspeech_csmsc \
|
||||
--am_config=${config_path} \
|
||||
--am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
|
||||
--am_stat=dump/train/feats_stats.npy \
|
||||
--voc=wavernn_csmsc \
|
||||
--voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
|
||||
--voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
|
||||
--voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
|
||||
--test_metadata=dump/test/norm/metadata.jsonl \
|
||||
--output_dir=${train_output_path}/test \
|
||||
--tones_dict=dump/tone_id_map.txt \
|
||||
--phones_dict=dump/phone_id_map.txt \
|
||||
--ngpu=0 \
|
||||
--nnpu=1
|
||||
fi
|
@ -0,0 +1,16 @@
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
config_path=$1
|
||||
train_output_path=$2
|
||||
# export MLU_VISIBLE_DEVICES=8
|
||||
python ${BIN_DIR}/train.py \
|
||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
||||
--config=${config_path} \
|
||||
--output-dir=${train_output_path} \
|
||||
--ngpu=0 \
|
||||
--nmlu=2 \
|
||||
--phones-dict=dump/phone_id_map.txt \
|
||||
--tones-dict=dump/tone_id_map.txt \
|
||||
--use-relative-path=True
|
@ -0,0 +1,16 @@
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
config_path=$1
|
||||
train_output_path=$2
|
||||
|
||||
python ${BIN_DIR}/train.py \
|
||||
--train-metadata=dump/train/norm/metadata.jsonl \
|
||||
--dev-metadata=dump/dev/norm/metadata.jsonl \
|
||||
--config=${config_path} \
|
||||
--output-dir=${train_output_path} \
|
||||
--ngpu=0 \
|
||||
--nnpu=1 \
|
||||
--phones-dict=dump/phone_id_map.txt \
|
||||
--tones-dict=dump/tone_id_map.txt \
|
||||
--use-relative-path=True
|
@ -0,0 +1,76 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
source path.sh
|
||||
export CUSTOM_DEVICE_BLACK_LIST=elementwise_max
|
||||
mlus=0
|
||||
stage=0
|
||||
stop_stage=100
|
||||
|
||||
conf_path=conf/default.yaml
|
||||
train_output_path=exp/default
|
||||
ckpt_name=snapshot_iter_30600.pdz
|
||||
|
||||
# with the following command, you can choose the stage range you want to run
|
||||
# such as `./run.sh --stage 0 --stop-stage 0`
|
||||
# this can not be mixed use with `$1`, `$2` ...
|
||||
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
|
||||
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
# prepare data
|
||||
./local/preprocess.sh ${conf_path} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
# train model, all `ckpt` under `train_output_path/checkpoints/` dir
|
||||
FLAGS_selected_mlus=${mlus} ./local/train_mlu.sh ${conf_path} ${train_output_path} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
# synthesize, vocoder is pwgan by default
|
||||
FLAGS_selected_mlus=${mlus} ./local/synthesize_mlu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
# synthesize_e2e, vocoder is pwgan by default
|
||||
FLAGS_selected_mlus=${mlus} ./local/synthesize_e2e_mlu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
# inference with static model
|
||||
FLAGS_selected_mlus=${mlus} ./local/inference_mlu.sh ${train_output_path} || exit -1
|
||||
fi
|
||||
|
||||
# paddle2onnx, please make sure the static models are in ${train_output_path}/inference first
|
||||
# we have only tested the following models so far
|
||||
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
|
||||
# install paddle2onnx
|
||||
pip install paddle2onnx --upgrade
|
||||
./local/paddle2onnx.sh ${train_output_path} inference inference_onnx speedyspeech_csmsc
|
||||
# considering the balance between speed and quality, we recommend that you use hifigan as vocoder
|
||||
./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_csmsc
|
||||
# ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx mb_melgan_csmsc
|
||||
# ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_csmsc
|
||||
fi
|
||||
|
||||
# inference with onnxruntime
|
||||
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
|
||||
./local/ort_predict.sh ${train_output_path}
|
||||
fi
|
||||
|
||||
# must run after stage 3 (which stage generated static models)
|
||||
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
|
||||
./local/export2lite.sh ${train_output_path} inference pdlite speedyspeech_csmsc x86
|
||||
./local/export2lite.sh ${train_output_path} inference pdlite pwgan_csmsc x86
|
||||
# ./local/export2lite.sh ${train_output_path} inference pdlite mb_melgan_csmsc x86
|
||||
# ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_csmsc x86
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
|
||||
CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
|
||||
fi
|
||||
|
||||
# PTQ_static
|
||||
if [ ${stage} -le 9 ] && [ ${stop_stage} -ge 9 ]; then
|
||||
CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh ${train_output_path} speedyspeech_csmsc || exit -1
|
||||
fi
|
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
source path.sh
|
||||
|
||||
npus=0
|
||||
stage=0
|
||||
stop_stage=100
|
||||
|
||||
conf_path=conf/default.yaml
|
||||
train_output_path=exp/default
|
||||
ckpt_name=snapshot_iter_76.pdz
|
||||
|
||||
# with the following command, you can choose the stage range you want to run
|
||||
# such as `./run_xpu.sh --stage 0 --stop-stage 0`
|
||||
# this can not be mixed use with `$1`, `$2` ...
|
||||
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
|
||||
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
# prepare data
|
||||
./local/preprocess.sh ${conf_path} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
# train model, all `ckpt` under `train_output_path/checkpoints/` dir
|
||||
FLAGS_selected_npus=${npus} ./local/train_npu.sh ${conf_path} ${train_output_path} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
# synthesize, vocoder is pwgan by default
|
||||
FLAGS_selected_npus=${npus} ./local/synthesize_npu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
# synthesize_e2e, vocoder is pwgan by default
|
||||
FLAGS_selected_npus=${npus} ./local/synthesize_e2e_npu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
# inference with static model
|
||||
FLAGS_selected_npus=${npus} ./local/inference_npu.sh ${train_output_path} || exit -1
|
||||
fi
|
@ -0,0 +1,31 @@
|
||||
function main(){
|
||||
set -ex
|
||||
speech_ci_path=`pwd`
|
||||
|
||||
echo "Start asr"
|
||||
cd ${speech_ci_path}/asr
|
||||
bash deepspeech2_online_model_test.sh
|
||||
python error_rate_test.py
|
||||
python mask_test.py
|
||||
python reverse_pad_list.py
|
||||
echo "End asr"
|
||||
|
||||
echo "Start TTS"
|
||||
cd ${speech_ci_path}/tts
|
||||
python test_data_table.py
|
||||
python test_enfrontend.py
|
||||
python test_mixfrontend.py
|
||||
echo "End TTS"
|
||||
|
||||
echo "Start Vector"
|
||||
cd ${speech_ci_path}/vector
|
||||
python test_augment.py
|
||||
echo "End Vector"
|
||||
|
||||
echo "Start cli"
|
||||
cd ${speech_ci_path}/cli
|
||||
bash test_cli.sh
|
||||
echo "End cli"
|
||||
}
|
||||
|
||||
main
|
@ -0,0 +1,29 @@
|
||||
# test CLI 测试文档
|
||||
|
||||
该文档为 CLI 测试说明,该测试目前覆盖大部分 paddlespeech 中的 CLI 推理。该 CI 建立后用于快速验证修复是否正确。
|
||||
|
||||
# 测试流程
|
||||
## 1. 环境安装
|
||||
|
||||
CI 重建时在已有通过版本 paddlepaddle-gpu==2.5.1, paddlepseech==develop 下运行。
|
||||
|
||||
CI 重建后在 paddlepaddle-gpu==develop, paddlepseech==develop 下运行。
|
||||
|
||||
### 其他相关依赖
|
||||
|
||||
gcc >= 4.8.5,
|
||||
python >= 3.8
|
||||
|
||||
## 2. 功能测试
|
||||
|
||||
在 repo 的 tests/unit/cli 中运行:
|
||||
|
||||
```shell
|
||||
|
||||
source path.sh
|
||||
bash test_cli.sh
|
||||
|
||||
```
|
||||
## 3. 预期结果
|
||||
|
||||
输出 "Test success",且运行过程中无报错或 Error 即为成功。
|
@ -0,0 +1,4 @@
|
||||
FROM registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.2-cudnn8.2-trt8.0-gcc82
|
||||
RUN apt-get update -y
|
||||
RUN apt-get -y install libsndfile1
|
||||
RUN pip3.8 install pytest-runner
|
@ -0,0 +1,54 @@
|
||||
set +x
|
||||
|
||||
# use pre-commit 2.17
|
||||
if ! [[ $(pre-commit --version) == *"2.17.0"* ]]; then
|
||||
pip install pre-commit==2.17.0 1>nul
|
||||
fi
|
||||
|
||||
# Install clang-format before git commit to avoid repeat installation due to
|
||||
# pre-commit multi-thread running.
|
||||
readonly VERSION="13.0.0"
|
||||
version=$(clang-format -version)
|
||||
if ! [[ $(python -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1$2}') -ge 36 ]]; then
|
||||
echo "clang-format installation by pip need python version great equal 3.6,
|
||||
please change the default python to higher version."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
diff_files=$(git diff --name-only --diff-filter=ACMR ${BRANCH})
|
||||
num_diff_files=$(echo "$diff_files" | wc -l)
|
||||
echo -e "diff files between pr and ${BRANCH}:\n${diff_files}"
|
||||
|
||||
echo "Checking code style by pre-commit ..."
|
||||
pre-commit run --files ${diff_files};check_error=$?
|
||||
|
||||
if test ! -z "$(git diff)"; then
|
||||
echo -e '\n************************************************************************************'
|
||||
echo -e "These files have been formatted by code format hook. You should use pre-commit to \
|
||||
format them before git push."
|
||||
echo -e '************************************************************************************\n'
|
||||
git diff 2>&1
|
||||
fi
|
||||
|
||||
echo -e '\n************************************************************************************'
|
||||
if [ ${check_error} != 0 ];then
|
||||
echo "Your PR code style check failed."
|
||||
echo "Please install pre-commit locally and set up git hook scripts:"
|
||||
echo ""
|
||||
echo " pip install pre-commit==2.17.0"
|
||||
echo " pre-commit install"
|
||||
echo ""
|
||||
if [[ $num_diff_files -le 100 ]];then
|
||||
echo "Then, run pre-commit to check codestyle issues in your PR:"
|
||||
echo ""
|
||||
echo " pre-commit run --files" $(echo ${diff_files} | tr "\n" " ")
|
||||
echo ""
|
||||
fi
|
||||
echo "For more information, please refer to our codestyle check guide:"
|
||||
echo "https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/dev_guides/git_guides/codestyle_check_guide_cn.html"
|
||||
else
|
||||
echo "Your PR code style check passed."
|
||||
fi
|
||||
echo -e '************************************************************************************\n'
|
||||
|
||||
exit ${check_error}
|
Loading…
Reference in new issue