From 83d93da8d023a7df319f2911af72f150b53f7807 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 2 Jun 2023 07:12:17 +0000 Subject: [PATCH] add scripts for tts code switch --- examples/zh_en_tts/tts3/.gitignore | 2 ++ examples/zh_en_tts/tts3/README.md | 32 +++++++++---------- examples/zh_en_tts/tts3/local/mfa_download.sh | 16 ++++++++++ .../zh_en_tts/tts3/local/model_download.sh | 13 ++++++++ examples/zh_en_tts/tts3/run.sh | 4 +-- 5 files changed, 49 insertions(+), 18 deletions(-) create mode 100644 examples/zh_en_tts/tts3/.gitignore create mode 100755 examples/zh_en_tts/tts3/local/mfa_download.sh create mode 100755 examples/zh_en_tts/tts3/local/model_download.sh diff --git a/examples/zh_en_tts/tts3/.gitignore b/examples/zh_en_tts/tts3/.gitignore new file mode 100644 index 000000000..bbd86a25b --- /dev/null +++ b/examples/zh_en_tts/tts3/.gitignore @@ -0,0 +1,2 @@ +data +exp diff --git a/examples/zh_en_tts/tts3/README.md b/examples/zh_en_tts/tts3/README.md index 012028007..1f04d41e7 100644 --- a/examples/zh_en_tts/tts3/README.md +++ b/examples/zh_en_tts/tts3/README.md @@ -6,11 +6,11 @@ This example contains code used to train a [Fastspeech2](https://arxiv.org/abs/2 ## Dataset ### Download and Extract -Download all datasets and extract it to `~/datasets`: -- The CSMSC dataset is in the directory `~/datasets/BZNSYP` -- The Ljspeech dataset is in the directory `~/datasets/LJSpeech-1.1` -- The aishell3 dataset is in the directory `~/datasets/data_aishell3` -- The vctk dataset is in the directory `~/datasets/VCTK-Corpus-0.92` +Download all datasets and extract it to `./data`: +- The CSMSC dataset is in the directory `./data/BZNSYP` +- The Ljspeech dataset is in the directory `./data/LJSpeech-1.1` +- The aishell3 dataset is in the directory `./data/data_aishell3` +- The vctk dataset is in the directory `./data/VCTK-Corpus-0.92` ### Get MFA Result and Extract We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for the fastspeech2 training. @@ -24,16 +24,16 @@ Or train your MFA model reference to [mfa example](https://github.com/PaddlePadd ## Get Started Assume the paths to the datasets are: -- `~/datasets/BZNSYP` -- `~/datasets/LJSpeech-1.1` -- `~/datasets/data_aishell3` -- `~/datasets/VCTK-Corpus-0.92` +- `./data/BZNSYP` +- `./data/LJSpeech-1.1` +- `./data/data_aishell3` +- `./data/VCTK-Corpus-0.92` Assume the path to the MFA results of the datasets are: -- `./mfa_results/baker_alignment_tone` -- `./mfa_results/ljspeech_alignment` -- `./mfa_results/aishell3_alignment_tone` -- `./mfa_results/vctk_alignment` +- `./data/mfa/baker_alignment_tone` +- `./data/mfa/ljspeech_alignment` +- `./data/mfa/aishell3_alignment_tone` +- `./data/mfa/vctk_alignment` Run the command below to 1. **source path**. @@ -288,6 +288,9 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --am_config=fastspeech2_mix_ckpt_1.2.0/default.yaml \ --am_ckpt=fastspeech2_mix_ckpt_1.2.0/snapshot_iter_99200.pdz \ --am_stat=fastspeech2_mix_ckpt_1.2.0/speech_stats.npy \ + --phones_dict=fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \ + --speaker_dict=fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \ + --spk_id=174 \ --voc=pwgan_aishell3 \ --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \ --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ @@ -295,8 +298,5 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --lang=mix \ --text=${BIN_DIR}/../sentences_mix.txt \ --output_dir=exp/default/test_e2e \ - --phones_dict=fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \ - --speaker_dict=fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \ - --spk_id=174 \ --inference_dir=exp/default/inference ``` diff --git a/examples/zh_en_tts/tts3/local/mfa_download.sh b/examples/zh_en_tts/tts3/local/mfa_download.sh new file mode 100755 index 000000000..1863c896d --- /dev/null +++ b/examples/zh_en_tts/tts3/local/mfa_download.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +exp=exp +mfa=$exp/mfa + +mkdir -p $mfa + +pushd $mfa + +wget -c https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz & +wget -c https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz & +wget -c https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz & +wget -c https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz & +wait + +popd diff --git a/examples/zh_en_tts/tts3/local/model_download.sh b/examples/zh_en_tts/tts3/local/model_download.sh new file mode 100755 index 000000000..20a830b74 --- /dev/null +++ b/examples/zh_en_tts/tts3/local/model_download.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +exp=exp +pretrain=$exp/pretrain + +mkdir -p $pretrain + +pushd $pretrain + +wget -c https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip & +wait + +popd diff --git a/examples/zh_en_tts/tts3/run.sh b/examples/zh_en_tts/tts3/run.sh index a4d86480b..a18421f5a 100755 --- a/examples/zh_en_tts/tts3/run.sh +++ b/examples/zh_en_tts/tts3/run.sh @@ -7,8 +7,8 @@ gpus=0,1 stage=0 stop_stage=100 -datasets_root_dir=~/datasets -mfa_root_dir=./mfa_results/ +datasets_root_dir=./data +mfa_root_dir=./data/mfa conf_path=conf/default.yaml train_output_path=exp/default ckpt_name=snapshot_iter_99200.pdz