fix benchmark and chain, add parse_options in run.sh, move tacotron2_ge2e into voice_cloning

pull/932/head
TianYuan 3 years ago
parent 2e9d9dc9a7
commit 20226b4fdd

@ -12,6 +12,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_482.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -9,7 +9,7 @@ alignment=$3
ge2e_ckpt_path=$4
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
python3 ${BIN_DIR}/../ge2e/inference.py \
python3 ${BIN_DIR}/../../ge2e/inference.py \
--input=${input} \
--output=${preprocess_path}/embed \
--device="gpu" \

@ -9,5 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
MODEL=tacotron2_ge2e
MODEL=voice_cloning/tacotron2_ge2e
export BIN_DIR=${MAIN_ROOT}/parakeet/exps/${MODEL}

@ -23,6 +23,10 @@ waveflow_params_path=./waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams
vc_input=ref_audio
vc_output=syn_audio
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_76.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
bash ./local/preprocess.sh ${conf_path} || exit -1

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -11,6 +11,11 @@ preprocess_path=preprocessed_ljspeech
train_output_path=output
ckpt_name=step-35000
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${preprocess_path} || exit -1

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_403.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_201.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -13,6 +13,11 @@ train_output_path=output
input_mel_path=../tts0/output/test
ckpt_name=step-10000
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${preprocess_path} || exit -1

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -15,6 +15,10 @@ infer_input=infer_input
infer_output=infer_output
ckpt_name=step-10000
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_331.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -11,6 +11,11 @@ conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz
# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
./local/preprocess.sh ${conf_path} || exit -1

@ -0,0 +1,13 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -19,8 +19,8 @@ from paddle.io import Dataset
from parakeet.data import batch_spec
from parakeet.data import batch_text_id
from parakeet.exps.tacotron2_ge2e.preprocess_transcription import _phones
from parakeet.exps.tacotron2_ge2e.preprocess_transcription import _tones
from parakeet.exps.voice_cloning.tacotron2_ge2e.preprocess_transcription import _phones
from parakeet.exps.voice_cloning.tacotron2_ge2e.preprocess_transcription import _tones
from parakeet.frontend import Vocab
voc_phones = Vocab(sorted(list(_phones)))

@ -17,7 +17,7 @@ from typing import Tuple
from pypinyin import lazy_pinyin
from pypinyin import Style
from parakeet.exps.tacotron2_ge2e.preprocess_transcription import split_syllable
from parakeet.exps.voice_cloning.tacotron2_ge2e.preprocess_transcription import split_syllable
def convert_to_pinyin(text: str) -> List[str]:

@ -22,7 +22,7 @@ import tqdm
from parakeet.audio import AudioProcessor
from parakeet.audio.spec_normalizer import LogMagnitude
from parakeet.audio.spec_normalizer import NormalizerBase
from parakeet.exps.tacotron2_ge2e.config import get_cfg_defaults
from parakeet.exps.voice_cloning.tacotron2_ge2e.config import get_cfg_defaults
def extract_mel(fname: Path,

@ -23,9 +23,9 @@ from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from parakeet.data import dataset
from parakeet.exps.tacotron2_ge2e.aishell3 import AiShell3
from parakeet.exps.tacotron2_ge2e.aishell3 import collate_aishell3_examples
from parakeet.exps.tacotron2_ge2e.config import get_cfg_defaults
from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import AiShell3
from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import collate_aishell3_examples
from parakeet.exps.voice_cloning.tacotron2_ge2e.config import get_cfg_defaults
from parakeet.models.tacotron2 import Tacotron2
from parakeet.models.tacotron2 import Tacotron2Loss
from parakeet.training.cli import default_argument_parser

@ -21,9 +21,9 @@ import soundfile as sf
from matplotlib import pyplot as plt
from parakeet.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor
from parakeet.exps.tacotron2_ge2e.aishell3 import voc_phones
from parakeet.exps.tacotron2_ge2e.aishell3 import voc_tones
from parakeet.exps.tacotron2_ge2e.chinese_g2p import convert_sentence
from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import voc_phones
from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import voc_tones
from parakeet.exps.voice_cloning.tacotron2_ge2e.chinese_g2p import convert_sentence
from parakeet.models.lstm_speaker_encoder import LSTMSpeakerEncoder
from parakeet.models.tacotron2 import Tacotron2
from parakeet.models.waveflow import ConditionalWaveFlow

@ -4,8 +4,8 @@
```
即可运行.
执行逻辑:
1. cd 到 ../../../ (也就是 Parakeet 目录)
1. cd 到 ../../../ (也就是 Deepspeech 目录)
2. 安装 parakeet 所需依赖
3. 从 bos 下载数据集并解压缩
4. 预处理数据集为训练 pwg 所需格式,保存到 Parakeet/dump 文件夹底下
4. 预处理数据集为训练 pwg 所需格式,保存到 Deepspeech/dump 文件夹底下
5. 按照不同的参数执行 run_benchmark.sh 脚本

@ -10,6 +10,9 @@ cd ../../../
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
sudo apt-get install libsndfile1
pip install -e .
pushd examples/csmsc/voc1
source path.sh
popd
fi
# 2 拷贝该模型需要数据、预训练模型
# 下载 baker 数据集到 home 目录下并解压缩到 home 目录下
@ -22,15 +25,14 @@ fi
# 数据预处理
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
python examples/GANVocoder/preprocess.py --rootdir=BZNSYP/ --dumpdir=dump --num-cpu=20 --cut-sil=True --dur-file=durations.txt --config=examples/GANVocoder/parallelwave_gan/baker/conf/default.yaml
python utils/compute_statistics.py --metadata=dump/train/raw/metadata.jsonl --field-name="feats"
python examples/GANVocoder/normalize.py --metadata=dump/train/raw/metadata.jsonl --dumpdir=dump/train/norm --stats=dump/train/feats_stats.npy
python examples/GANVocoder/normalize.py --metadata=dump/dev/raw/metadata.jsonl --dumpdir=dump/dev/norm --stats=dump/train/feats_stats.npy
python examples/GANVocoder/normalize.py --metadata=dump/test/raw/metadata.jsonl --dumpdir=dump/test/norm --stats=dump/train/feats_stats.npy
python3 parakeet/exps/gan_vocoder/preprocess.py --rootdir=BZNSYP/ --dumpdir=dump --num-cpu=20 --cut-sil=True --dur-file=durations.txt --config=examples/csmsc/voc1/conf/default.yaml
python3 utils/compute_statistics.py --metadata=dump/train/raw/metadata.jsonl --field-name="feats"
python3 parakeet/exps/gan_vocoder/normalize.py --metadata=dump/train/raw/metadata.jsonl --dumpdir=dump/train/norm --stats=dump/train/feats_stats.npy
python3 parakeet/exps/gan_vocoder/normalize.py --metadata=dump/dev/raw/metadata.jsonl --dumpdir=dump/dev/norm --stats=dump/train/feats_stats.npy
python3 parakeet/exps/gan_vocoder/normalize.py --metadata=dump/test/raw/metadata.jsonl --dumpdir=dump/test/norm --stats=dump/train/feats_stats.npy
fi
# 3 批量运行如不方便批量12需放到单个模型中
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
model_mode_list=(pwg)
fp_item_list=(fp32)
# 满 bs 是 26
@ -40,7 +42,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
for bs_item in ${bs_item_list[@]}; do
echo "index is speed, 1gpus, begin, ${model_name}"
run_mode=sp
CUDA_VISIBLE_DEVICES=0 bash tests/benchmark/PWGAN/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 100 ${model_mode} # (5min)
CUDA_VISIBLE_DEVICES=0 bash tests/benchmark/pwgan/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 100 ${model_mode} # (5min)
sleep 60
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
run_mode=mp

@ -24,13 +24,13 @@ function _train(){
--max-iter=${max_iter}
--train-metadata=dump/train/norm/metadata.jsonl \
--dev-metadata=dump/dev/norm/metadata.jsonl \
--config=examples/GANVocoder/parallelwave_gan/baker/conf/default.yaml \
--config=examples/csmsc/voc1/conf/default.yaml \
--output-dir=exp/default \
--run-benchmark=true"
case ${run_mode} in
sp) train_cmd="python3 examples/GANVocoder/parallelwave_gan/train.py --nprocs=1 ${train_cmd}" ;;
mp) train_cmd="python3 examples/GANVocoder/parallelwave_gan/train.py --nprocs=8 ${train_cmd}"
sp) train_cmd="python3 parakeet/exps/gan_vocoder/parallelwave_gan/train.py --nprocs=1 ${train_cmd}" ;;
mp) train_cmd="python3 parakeet/exps/gan_vocoder/parallelwave_gan/train.py --nprocs=8 ${train_cmd}"
log_parse_file="mylog/workerlog.0" ;;
*) echo "choose run_mode(sp or mp)"; exit 1;
esac

@ -13,7 +13,7 @@ null:null
null:null
##
trainer:norm_train
norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --batch_size=32 --max_epoch=20 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True
norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --batch_size=32 --max_epoch=20 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True
null:null
null:null
null:null
@ -21,7 +21,7 @@ null:null
null:null
##
===========================eval_params===========================
eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_20.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt
eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=../../../examples/csmsc/tts2/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_20.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt
null:null
##
===========================infer_params===========================
@ -37,7 +37,7 @@ null:null
null:null
null:null
null:null
inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
null:null
null:null
null:null

@ -13,7 +13,7 @@ null:null
null:null
##
trainer:norm_train
norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --batch_size=32 --max_epoch=10 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True
norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --batch_size=32 --max_epoch=10 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True
null:null
null:null
null:null
@ -21,7 +21,7 @@ null:null
null:null
##
===========================eval_params===========================
eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_30.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt
eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=../../../examples/csmsc/tts2/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_30.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt
null:null
##
===========================infer_params===========================
@ -37,7 +37,7 @@ null:null
null:null
null:null
null:null
inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
--use_gpu:True
null:null
null:null

@ -13,7 +13,7 @@ null:null
null:null
##
trainer:norm_train
norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True
norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True
null:null
null:null
null:null
@ -21,7 +21,7 @@ null:null
null:null
##
===========================eval_params===========================
eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt
eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt
null:null
##
===========================infer_params===========================
@ -37,7 +37,7 @@ null:null
null:null
null:null
null:null
inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
null:null
null:null
null:null

@ -13,7 +13,7 @@ null:null
null:null
##
trainer:norm_train
norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True
norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True
null:null
null:null
null:null
@ -21,7 +21,7 @@ null:null
null:null
##
===========================eval_params===========================
eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt
eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt
null:null
##
===========================infer_params===========================
@ -37,7 +37,7 @@ null:null
null:null
null:null
null:null
inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt
null:null
null:null
null:null

Loading…
Cancel
Save