From b4c85b513144373c97e0a26d0f30c5ab0e1a0950 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 26 Oct 2021 09:07:15 +0000 Subject: [PATCH] fix benchmark and chain, add parse_options in run.sh, move tacotron2_ge2e into voice_cloning --- examples/aishell3/tts3/run.sh | 5 +++++ examples/aishell3/vc0/local/preprocess.sh | 2 +- examples/aishell3/vc0/path.sh | 2 +- examples/aishell3/vc0/run.sh | 4 ++++ examples/csmsc/tts2/run.sh | 5 +++++ examples/csmsc/tts3/run.sh | 5 +++++ examples/csmsc/voc1/run.sh | 5 +++++ examples/ljspeech/tts0/run.sh | 5 +++++ examples/ljspeech/tts1/run.sh | 5 +++++ examples/ljspeech/tts3/run.sh | 5 +++++ examples/ljspeech/voc0/run.sh | 5 +++++ examples/ljspeech/voc1/run.sh | 5 +++++ examples/other/ge2e/run.sh | 4 ++++ examples/vctk/tts3/run.sh | 5 +++++ examples/vctk/voc1/run.sh | 5 +++++ .../__init__.py | 0 .../voice_cloning/tacotron2_ge2e/__init__.py | 13 +++++++++++++ .../tacotron2_ge2e/aishell3.py | 4 ++-- .../tacotron2_ge2e/chinese_g2p.py | 2 +- .../{ => voice_cloning}/tacotron2_ge2e/config.py | 0 .../tacotron2_ge2e/extract_mel.py | 2 +- .../tacotron2_ge2e/lexicon.txt | 0 .../tacotron2_ge2e/preprocess_transcription.py | 0 .../tacotron2_ge2e/process_wav.py | 0 .../{ => voice_cloning}/tacotron2_ge2e/train.py | 6 +++--- .../tacotron2_ge2e/voice_cloning.py | 6 +++--- tests/benchmark/pwgan/README.md | 4 ++-- tests/benchmark/pwgan/run_all.sh | 16 +++++++++------- tests/benchmark/pwgan/run_benchmark.sh | 6 +++--- tests/chains/speedyspeech/lite_train_infer.sh | 0 tests/chains/speedyspeech/prepare.sh | 0 .../speedyspeech_params_lite_multi_gpu.txt | 6 +++--- .../speedyspeech_params_lite_single_gpu.txt | 6 +++--- .../speedyspeech_params_whole_multi_gpu.txt | 6 +++--- .../speedyspeech_params_whole_single_gpu.txt | 6 +++--- tests/chains/speedyspeech/test.sh | 0 tests/chains/speedyspeech/whole_train_infer.sh | 0 37 files changed, 114 insertions(+), 36 deletions(-) rename parakeet/exps/{tacotron2_ge2e => voice_cloning}/__init__.py (100%) create mode 100644 parakeet/exps/voice_cloning/tacotron2_ge2e/__init__.py rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/aishell3.py (94%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/chinese_g2p.py (93%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/config.py (100%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/extract_mel.py (97%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/lexicon.txt (100%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/preprocess_transcription.py (100%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/process_wav.py (100%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/train.py (97%) rename parakeet/exps/{ => voice_cloning}/tacotron2_ge2e/voice_cloning.py (96%) mode change 100644 => 100755 tests/chains/speedyspeech/lite_train_infer.sh mode change 100644 => 100755 tests/chains/speedyspeech/prepare.sh mode change 100644 => 100755 tests/chains/speedyspeech/test.sh mode change 100644 => 100755 tests/chains/speedyspeech/whole_train_infer.sh diff --git a/examples/aishell3/tts3/run.sh b/examples/aishell3/tts3/run.sh index a58fec5d3..656710763 100755 --- a/examples/aishell3/tts3/run.sh +++ b/examples/aishell3/tts3/run.sh @@ -12,6 +12,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_482.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/aishell3/vc0/local/preprocess.sh b/examples/aishell3/vc0/local/preprocess.sh index fae021307..87cfab32a 100755 --- a/examples/aishell3/vc0/local/preprocess.sh +++ b/examples/aishell3/vc0/local/preprocess.sh @@ -9,7 +9,7 @@ alignment=$3 ge2e_ckpt_path=$4 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then - python3 ${BIN_DIR}/../ge2e/inference.py \ + python3 ${BIN_DIR}/../../ge2e/inference.py \ --input=${input} \ --output=${preprocess_path}/embed \ --device="gpu" \ diff --git a/examples/aishell3/vc0/path.sh b/examples/aishell3/vc0/path.sh index df2af8035..485d73bf7 100755 --- a/examples/aishell3/vc0/path.sh +++ b/examples/aishell3/vc0/path.sh @@ -9,5 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1 export PYTHONIOENCODING=UTF-8 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} -MODEL=tacotron2_ge2e +MODEL=voice_cloning/tacotron2_ge2e export BIN_DIR=${MAIN_ROOT}/parakeet/exps/${MODEL} diff --git a/examples/aishell3/vc0/run.sh b/examples/aishell3/vc0/run.sh index dab9a5ceb..8d3da7813 100755 --- a/examples/aishell3/vc0/run.sh +++ b/examples/aishell3/vc0/run.sh @@ -23,6 +23,10 @@ waveflow_params_path=./waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams vc_input=ref_audio vc_output=syn_audio +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data diff --git a/examples/csmsc/tts2/run.sh b/examples/csmsc/tts2/run.sh index 5d00a0700..200e81929 100755 --- a/examples/csmsc/tts2/run.sh +++ b/examples/csmsc/tts2/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_76.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/csmsc/tts3/run.sh b/examples/csmsc/tts3/run.sh index 24e439924..f45ddab06 100755 --- a/examples/csmsc/tts3/run.sh +++ b/examples/csmsc/tts3/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_153.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data bash ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/csmsc/voc1/run.sh b/examples/csmsc/voc1/run.sh index 666a15120..163095439 100755 --- a/examples/csmsc/voc1/run.sh +++ b/examples/csmsc/voc1/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_5000.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/ljspeech/tts0/run.sh b/examples/ljspeech/tts0/run.sh index 9907b97fd..1da80c962 100755 --- a/examples/ljspeech/tts0/run.sh +++ b/examples/ljspeech/tts0/run.sh @@ -11,6 +11,11 @@ preprocess_path=preprocessed_ljspeech train_output_path=output ckpt_name=step-35000 +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${preprocess_path} || exit -1 diff --git a/examples/ljspeech/tts1/run.sh b/examples/ljspeech/tts1/run.sh index 7d6599061..6e7a60607 100755 --- a/examples/ljspeech/tts1/run.sh +++ b/examples/ljspeech/tts1/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_403.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/ljspeech/tts3/run.sh b/examples/ljspeech/tts3/run.sh index 329ba124d..143debd2a 100755 --- a/examples/ljspeech/tts3/run.sh +++ b/examples/ljspeech/tts3/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_201.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/ljspeech/voc0/run.sh b/examples/ljspeech/voc0/run.sh index aeb1c8d1a..a4f1ac389 100755 --- a/examples/ljspeech/voc0/run.sh +++ b/examples/ljspeech/voc0/run.sh @@ -13,6 +13,11 @@ train_output_path=output input_mel_path=../tts0/output/test ckpt_name=step-10000 +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${preprocess_path} || exit -1 diff --git a/examples/ljspeech/voc1/run.sh b/examples/ljspeech/voc1/run.sh index 666a15120..163095439 100755 --- a/examples/ljspeech/voc1/run.sh +++ b/examples/ljspeech/voc1/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_5000.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/other/ge2e/run.sh b/examples/other/ge2e/run.sh index 2a2db3eeb..d7954bd2f 100755 --- a/examples/other/ge2e/run.sh +++ b/examples/other/ge2e/run.sh @@ -15,6 +15,10 @@ infer_input=infer_input infer_output=infer_output ckpt_name=step-10000 +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data diff --git a/examples/vctk/tts3/run.sh b/examples/vctk/tts3/run.sh index 474d8e49a..0562ef3f4 100755 --- a/examples/vctk/tts3/run.sh +++ b/examples/vctk/tts3/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_331.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/examples/vctk/voc1/run.sh b/examples/vctk/voc1/run.sh index 71e2727c9..7d0fdb21e 100755 --- a/examples/vctk/voc1/run.sh +++ b/examples/vctk/voc1/run.sh @@ -11,6 +11,11 @@ conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_5000.pdz +# with the following command, you can choice the stage range you want to run +# such as `./run.sh --stage 0 --stop-stage 0` +# this can not be mixed use with `$1`, `$2` ... +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data ./local/preprocess.sh ${conf_path} || exit -1 diff --git a/parakeet/exps/tacotron2_ge2e/__init__.py b/parakeet/exps/voice_cloning/__init__.py similarity index 100% rename from parakeet/exps/tacotron2_ge2e/__init__.py rename to parakeet/exps/voice_cloning/__init__.py diff --git a/parakeet/exps/voice_cloning/tacotron2_ge2e/__init__.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/__init__.py new file mode 100644 index 000000000..abf198b97 --- /dev/null +++ b/parakeet/exps/voice_cloning/tacotron2_ge2e/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/parakeet/exps/tacotron2_ge2e/aishell3.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/aishell3.py similarity index 94% rename from parakeet/exps/tacotron2_ge2e/aishell3.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/aishell3.py index 542573996..b697ecf36 100644 --- a/parakeet/exps/tacotron2_ge2e/aishell3.py +++ b/parakeet/exps/voice_cloning/tacotron2_ge2e/aishell3.py @@ -19,8 +19,8 @@ from paddle.io import Dataset from parakeet.data import batch_spec from parakeet.data import batch_text_id -from parakeet.exps.tacotron2_ge2e.preprocess_transcription import _phones -from parakeet.exps.tacotron2_ge2e.preprocess_transcription import _tones +from parakeet.exps.voice_cloning.tacotron2_ge2e.preprocess_transcription import _phones +from parakeet.exps.voice_cloning.tacotron2_ge2e.preprocess_transcription import _tones from parakeet.frontend import Vocab voc_phones = Vocab(sorted(list(_phones))) diff --git a/parakeet/exps/tacotron2_ge2e/chinese_g2p.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/chinese_g2p.py similarity index 93% rename from parakeet/exps/tacotron2_ge2e/chinese_g2p.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/chinese_g2p.py index 6cb86d9db..f975522f1 100644 --- a/parakeet/exps/tacotron2_ge2e/chinese_g2p.py +++ b/parakeet/exps/voice_cloning/tacotron2_ge2e/chinese_g2p.py @@ -17,7 +17,7 @@ from typing import Tuple from pypinyin import lazy_pinyin from pypinyin import Style -from parakeet.exps.tacotron2_ge2e.preprocess_transcription import split_syllable +from parakeet.exps.voice_cloning.tacotron2_ge2e.preprocess_transcription import split_syllable def convert_to_pinyin(text: str) -> List[str]: diff --git a/parakeet/exps/tacotron2_ge2e/config.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/config.py similarity index 100% rename from parakeet/exps/tacotron2_ge2e/config.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/config.py diff --git a/parakeet/exps/tacotron2_ge2e/extract_mel.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/extract_mel.py similarity index 97% rename from parakeet/exps/tacotron2_ge2e/extract_mel.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/extract_mel.py index e32f3e3bb..53daa3fcd 100644 --- a/parakeet/exps/tacotron2_ge2e/extract_mel.py +++ b/parakeet/exps/voice_cloning/tacotron2_ge2e/extract_mel.py @@ -22,7 +22,7 @@ import tqdm from parakeet.audio import AudioProcessor from parakeet.audio.spec_normalizer import LogMagnitude from parakeet.audio.spec_normalizer import NormalizerBase -from parakeet.exps.tacotron2_ge2e.config import get_cfg_defaults +from parakeet.exps.voice_cloning.tacotron2_ge2e.config import get_cfg_defaults def extract_mel(fname: Path, diff --git a/parakeet/exps/tacotron2_ge2e/lexicon.txt b/parakeet/exps/voice_cloning/tacotron2_ge2e/lexicon.txt similarity index 100% rename from parakeet/exps/tacotron2_ge2e/lexicon.txt rename to parakeet/exps/voice_cloning/tacotron2_ge2e/lexicon.txt diff --git a/parakeet/exps/tacotron2_ge2e/preprocess_transcription.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/preprocess_transcription.py similarity index 100% rename from parakeet/exps/tacotron2_ge2e/preprocess_transcription.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/preprocess_transcription.py diff --git a/parakeet/exps/tacotron2_ge2e/process_wav.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/process_wav.py similarity index 100% rename from parakeet/exps/tacotron2_ge2e/process_wav.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/process_wav.py diff --git a/parakeet/exps/tacotron2_ge2e/train.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/train.py similarity index 97% rename from parakeet/exps/tacotron2_ge2e/train.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/train.py index 35878a1b5..1a9bd8cb9 100644 --- a/parakeet/exps/tacotron2_ge2e/train.py +++ b/parakeet/exps/voice_cloning/tacotron2_ge2e/train.py @@ -23,9 +23,9 @@ from paddle.io import DataLoader from paddle.io import DistributedBatchSampler from parakeet.data import dataset -from parakeet.exps.tacotron2_ge2e.aishell3 import AiShell3 -from parakeet.exps.tacotron2_ge2e.aishell3 import collate_aishell3_examples -from parakeet.exps.tacotron2_ge2e.config import get_cfg_defaults +from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import AiShell3 +from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import collate_aishell3_examples +from parakeet.exps.voice_cloning.tacotron2_ge2e.config import get_cfg_defaults from parakeet.models.tacotron2 import Tacotron2 from parakeet.models.tacotron2 import Tacotron2Loss from parakeet.training.cli import default_argument_parser diff --git a/parakeet/exps/tacotron2_ge2e/voice_cloning.py b/parakeet/exps/voice_cloning/tacotron2_ge2e/voice_cloning.py similarity index 96% rename from parakeet/exps/tacotron2_ge2e/voice_cloning.py rename to parakeet/exps/voice_cloning/tacotron2_ge2e/voice_cloning.py index 269b0c18f..8afd35b77 100644 --- a/parakeet/exps/tacotron2_ge2e/voice_cloning.py +++ b/parakeet/exps/voice_cloning/tacotron2_ge2e/voice_cloning.py @@ -21,9 +21,9 @@ import soundfile as sf from matplotlib import pyplot as plt from parakeet.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor -from parakeet.exps.tacotron2_ge2e.aishell3 import voc_phones -from parakeet.exps.tacotron2_ge2e.aishell3 import voc_tones -from parakeet.exps.tacotron2_ge2e.chinese_g2p import convert_sentence +from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import voc_phones +from parakeet.exps.voice_cloning.tacotron2_ge2e.aishell3 import voc_tones +from parakeet.exps.voice_cloning.tacotron2_ge2e.chinese_g2p import convert_sentence from parakeet.models.lstm_speaker_encoder import LSTMSpeakerEncoder from parakeet.models.tacotron2 import Tacotron2 from parakeet.models.waveflow import ConditionalWaveFlow diff --git a/tests/benchmark/pwgan/README.md b/tests/benchmark/pwgan/README.md index 3d2267aeb..369f4b74f 100644 --- a/tests/benchmark/pwgan/README.md +++ b/tests/benchmark/pwgan/README.md @@ -4,8 +4,8 @@ ``` 即可运行. 执行逻辑: -1. cd 到 ../../../ (也就是 Parakeet 目录) +1. cd 到 ../../../ (也就是 Deepspeech 目录) 2. 安装 parakeet 所需依赖 3. 从 bos 下载数据集并解压缩 -4. 预处理数据集为训练 pwg 所需格式,保存到 Parakeet/dump 文件夹底下 +4. 预处理数据集为训练 pwg 所需格式,保存到 Deepspeech/dump 文件夹底下 5. 按照不同的参数执行 run_benchmark.sh 脚本 diff --git a/tests/benchmark/pwgan/run_all.sh b/tests/benchmark/pwgan/run_all.sh index e26db3178..e4bb17f8c 100755 --- a/tests/benchmark/pwgan/run_all.sh +++ b/tests/benchmark/pwgan/run_all.sh @@ -10,6 +10,9 @@ cd ../../../ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then sudo apt-get install libsndfile1 pip install -e . + pushd examples/csmsc/voc1 + source path.sh + popd fi # 2 拷贝该模型需要数据、预训练模型 # 下载 baker 数据集到 home 目录下并解压缩到 home 目录下 @@ -22,15 +25,14 @@ fi # 数据预处理 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then - python examples/GANVocoder/preprocess.py --rootdir=BZNSYP/ --dumpdir=dump --num-cpu=20 --cut-sil=True --dur-file=durations.txt --config=examples/GANVocoder/parallelwave_gan/baker/conf/default.yaml - python utils/compute_statistics.py --metadata=dump/train/raw/metadata.jsonl --field-name="feats" - python examples/GANVocoder/normalize.py --metadata=dump/train/raw/metadata.jsonl --dumpdir=dump/train/norm --stats=dump/train/feats_stats.npy - python examples/GANVocoder/normalize.py --metadata=dump/dev/raw/metadata.jsonl --dumpdir=dump/dev/norm --stats=dump/train/feats_stats.npy - python examples/GANVocoder/normalize.py --metadata=dump/test/raw/metadata.jsonl --dumpdir=dump/test/norm --stats=dump/train/feats_stats.npy + python3 parakeet/exps/gan_vocoder/preprocess.py --rootdir=BZNSYP/ --dumpdir=dump --num-cpu=20 --cut-sil=True --dur-file=durations.txt --config=examples/csmsc/voc1/conf/default.yaml + python3 utils/compute_statistics.py --metadata=dump/train/raw/metadata.jsonl --field-name="feats" + python3 parakeet/exps/gan_vocoder/normalize.py --metadata=dump/train/raw/metadata.jsonl --dumpdir=dump/train/norm --stats=dump/train/feats_stats.npy + python3 parakeet/exps/gan_vocoder/normalize.py --metadata=dump/dev/raw/metadata.jsonl --dumpdir=dump/dev/norm --stats=dump/train/feats_stats.npy + python3 parakeet/exps/gan_vocoder/normalize.py --metadata=dump/test/raw/metadata.jsonl --dumpdir=dump/test/norm --stats=dump/train/feats_stats.npy fi # 3 批量运行(如不方便批量,1,2需放到单个模型中) if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then - model_mode_list=(pwg) fp_item_list=(fp32) # 满 bs 是 26 @@ -40,7 +42,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then for bs_item in ${bs_item_list[@]}; do echo "index is speed, 1gpus, begin, ${model_name}" run_mode=sp - CUDA_VISIBLE_DEVICES=0 bash tests/benchmark/PWGAN/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 100 ${model_mode} # (5min) + CUDA_VISIBLE_DEVICES=0 bash tests/benchmark/pwgan/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 100 ${model_mode} # (5min) sleep 60 echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" run_mode=mp diff --git a/tests/benchmark/pwgan/run_benchmark.sh b/tests/benchmark/pwgan/run_benchmark.sh index bcdccccf4..e961e442b 100755 --- a/tests/benchmark/pwgan/run_benchmark.sh +++ b/tests/benchmark/pwgan/run_benchmark.sh @@ -24,13 +24,13 @@ function _train(){ --max-iter=${max_iter} --train-metadata=dump/train/norm/metadata.jsonl \ --dev-metadata=dump/dev/norm/metadata.jsonl \ - --config=examples/GANVocoder/parallelwave_gan/baker/conf/default.yaml \ + --config=examples/csmsc/voc1/conf/default.yaml \ --output-dir=exp/default \ --run-benchmark=true" case ${run_mode} in - sp) train_cmd="python3 examples/GANVocoder/parallelwave_gan/train.py --nprocs=1 ${train_cmd}" ;; - mp) train_cmd="python3 examples/GANVocoder/parallelwave_gan/train.py --nprocs=8 ${train_cmd}" + sp) train_cmd="python3 parakeet/exps/gan_vocoder/parallelwave_gan/train.py --nprocs=1 ${train_cmd}" ;; + mp) train_cmd="python3 parakeet/exps/gan_vocoder/parallelwave_gan/train.py --nprocs=8 ${train_cmd}" log_parse_file="mylog/workerlog.0" ;; *) echo "choose run_mode(sp or mp)"; exit 1; esac diff --git a/tests/chains/speedyspeech/lite_train_infer.sh b/tests/chains/speedyspeech/lite_train_infer.sh old mode 100644 new mode 100755 diff --git a/tests/chains/speedyspeech/prepare.sh b/tests/chains/speedyspeech/prepare.sh old mode 100644 new mode 100755 diff --git a/tests/chains/speedyspeech/speedyspeech_params_lite_multi_gpu.txt b/tests/chains/speedyspeech/speedyspeech_params_lite_multi_gpu.txt index 980262418..ad3420521 100644 --- a/tests/chains/speedyspeech/speedyspeech_params_lite_multi_gpu.txt +++ b/tests/chains/speedyspeech/speedyspeech_params_lite_multi_gpu.txt @@ -13,7 +13,7 @@ null:null null:null ## trainer:norm_train -norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --batch_size=32 --max_epoch=20 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True +norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --batch_size=32 --max_epoch=20 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True null:null null:null null:null @@ -21,7 +21,7 @@ null:null null:null ## ===========================eval_params=========================== -eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_20.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt +eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=../../../examples/csmsc/tts2/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_20.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt null:null ## ===========================infer_params=========================== @@ -37,7 +37,7 @@ null:null null:null null:null null:null -inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt +inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt null:null null:null null:null diff --git a/tests/chains/speedyspeech/speedyspeech_params_lite_single_gpu.txt b/tests/chains/speedyspeech/speedyspeech_params_lite_single_gpu.txt index e821183aa..eaad714de 100644 --- a/tests/chains/speedyspeech/speedyspeech_params_lite_single_gpu.txt +++ b/tests/chains/speedyspeech/speedyspeech_params_lite_single_gpu.txt @@ -13,7 +13,7 @@ null:null null:null ## trainer:norm_train -norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --batch_size=32 --max_epoch=10 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True +norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --batch_size=32 --max_epoch=10 --num_snapshots=10 --output-dir=exp/default --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt --use-relative-path=True null:null null:null null:null @@ -21,7 +21,7 @@ null:null null:null ## ===========================eval_params=========================== -eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_30.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt +eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=../../../examples/csmsc/tts2/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_30.pdz --speedyspeech-stat=train_data/mini_BZNSYP/train/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=train_data/mini_BZNSYP/phone_id_map.txt --tones-dict=train_data/mini_BZNSYP/tone_id_map.txt null:null ## ===========================infer_params=========================== @@ -37,7 +37,7 @@ null:null null:null null:null null:null -inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt +inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt --use_gpu:True null:null null:null diff --git a/tests/chains/speedyspeech/speedyspeech_params_whole_multi_gpu.txt b/tests/chains/speedyspeech/speedyspeech_params_whole_multi_gpu.txt index 7c5171197..236805fc5 100644 --- a/tests/chains/speedyspeech/speedyspeech_params_whole_multi_gpu.txt +++ b/tests/chains/speedyspeech/speedyspeech_params_whole_multi_gpu.txt @@ -13,7 +13,7 @@ null:null null:null ## trainer:norm_train -norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True +norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True null:null null:null null:null @@ -21,7 +21,7 @@ null:null null:null ## ===========================eval_params=========================== -eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt +eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt null:null ## ===========================infer_params=========================== @@ -37,7 +37,7 @@ null:null null:null null:null null:null -inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt +inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt null:null null:null null:null diff --git a/tests/chains/speedyspeech/speedyspeech_params_whole_single_gpu.txt b/tests/chains/speedyspeech/speedyspeech_params_whole_single_gpu.txt index 9a6c611e1..9caeb18e7 100644 --- a/tests/chains/speedyspeech/speedyspeech_params_whole_single_gpu.txt +++ b/tests/chains/speedyspeech/speedyspeech_params_whole_single_gpu.txt @@ -13,7 +13,7 @@ null:null null:null ## trainer:norm_train -norm_train:../../examples/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../examples/speedyspeech/baker/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True +norm_train:../../../parakeet/exps/speedyspeech/train.py --train-metadata=train_data/processed_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/processed_BZNSYP/dev/norm/metadata.jsonl --config=../../../examples/csmsc/tts2/conf/default.yaml --output-dir=exp/whole --phones-dict=train_data/processed_BZNSYP/phone_id_map.txt --tones-dict=train_data/processed_BZNSYP/tone_id_map.txt --use-relative-path=True null:null null:null null:null @@ -21,7 +21,7 @@ null:null null:null ## ===========================eval_params=========================== -eval:../../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../examples/speedyspeech/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt +eval:../../../parakeet/exps/speedyspeech/synthesize_e2e.py --speedyspeech-config=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/snapshot_iter_11400.pdz --speedyspeech-stat=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/feats_stats.npy --pwg-config=pretrain_models/pwg_baker_ckpt_0.4/pwg_default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../../../parakeet/exps/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt null:null ## ===========================infer_params=========================== @@ -37,7 +37,7 @@ null:null null:null null:null null:null -inference:../../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../examples/speedyspeech/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt +inference:../../../parakeet/exps/speedyspeech/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.5 --text=../../../parakeet/exps/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/phone_id_map.txt --tones-dict=pretrain_models/speedyspeech_pwg_inference_0.5/tone_id_map.txt null:null null:null null:null diff --git a/tests/chains/speedyspeech/test.sh b/tests/chains/speedyspeech/test.sh old mode 100644 new mode 100755 diff --git a/tests/chains/speedyspeech/whole_train_infer.sh b/tests/chains/speedyspeech/whole_train_infer.sh old mode 100644 new mode 100755