From 83d93da8d023a7df319f2911af72f150b53f7807 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Fri, 2 Jun 2023 07:12:17 +0000
Subject: [PATCH] add scripts for tts code switch

---
 examples/zh_en_tts/tts3/.gitignore            |  2 ++
 examples/zh_en_tts/tts3/README.md             | 32 +++++++++----------
 examples/zh_en_tts/tts3/local/mfa_download.sh | 16 ++++++++++
 .../zh_en_tts/tts3/local/model_download.sh    | 13 ++++++++
 examples/zh_en_tts/tts3/run.sh                |  4 +--
 5 files changed, 49 insertions(+), 18 deletions(-)
 create mode 100644 examples/zh_en_tts/tts3/.gitignore
 create mode 100755 examples/zh_en_tts/tts3/local/mfa_download.sh
 create mode 100755 examples/zh_en_tts/tts3/local/model_download.sh

diff --git a/examples/zh_en_tts/tts3/.gitignore b/examples/zh_en_tts/tts3/.gitignore
new file mode 100644
index 000000000..bbd86a25b
--- /dev/null
+++ b/examples/zh_en_tts/tts3/.gitignore
@@ -0,0 +1,2 @@
+data
+exp
diff --git a/examples/zh_en_tts/tts3/README.md b/examples/zh_en_tts/tts3/README.md
index 012028007..1f04d41e7 100644
--- a/examples/zh_en_tts/tts3/README.md
+++ b/examples/zh_en_tts/tts3/README.md
@@ -6,11 +6,11 @@ This example contains code used to train a [Fastspeech2](https://arxiv.org/abs/2
 
 ## Dataset
 ### Download and Extract
-Download all datasets and extract it to `~/datasets`:
-- The CSMSC dataset is in the directory `~/datasets/BZNSYP`
-- The Ljspeech dataset is in the directory `~/datasets/LJSpeech-1.1`
-- The aishell3 dataset is in the directory `~/datasets/data_aishell3`
-- The vctk dataset is in the directory `~/datasets/VCTK-Corpus-0.92`
+Download all datasets and extract it to `./data`:
+- The CSMSC dataset is in the directory `./data/BZNSYP`
+- The Ljspeech dataset is in the directory `./data/LJSpeech-1.1`
+- The aishell3 dataset is in the directory `./data/data_aishell3`
+- The vctk dataset is in the directory `./data/VCTK-Corpus-0.92`
  
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for the fastspeech2 training.
@@ -24,16 +24,16 @@ Or train your MFA model reference to [mfa example](https://github.com/PaddlePadd
 
 ## Get Started
 Assume the paths to the datasets are:
-- `~/datasets/BZNSYP`
-- `~/datasets/LJSpeech-1.1`
-- `~/datasets/data_aishell3` 
-- `~/datasets/VCTK-Corpus-0.92`
+- `./data/BZNSYP`
+- `./data/LJSpeech-1.1`
+- `./data/data_aishell3` 
+- `./data/VCTK-Corpus-0.92`
 
 Assume the path to the MFA results of the datasets are:
-- `./mfa_results/baker_alignment_tone`
-- `./mfa_results/ljspeech_alignment`
-- `./mfa_results/aishell3_alignment_tone`
-- `./mfa_results/vctk_alignment`
+- `./data/mfa/baker_alignment_tone`
+- `./data/mfa/ljspeech_alignment`
+- `./data/mfa/aishell3_alignment_tone`
+- `./data/mfa/vctk_alignment`
 
 Run the command below to
 1. **source path**.
@@ -288,6 +288,9 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \
   --am_config=fastspeech2_mix_ckpt_1.2.0/default.yaml \
   --am_ckpt=fastspeech2_mix_ckpt_1.2.0/snapshot_iter_99200.pdz \
   --am_stat=fastspeech2_mix_ckpt_1.2.0/speech_stats.npy \
+  --phones_dict=fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \
+  --speaker_dict=fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \
+  --spk_id=174 \
   --voc=pwgan_aishell3 \
   --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
   --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
@@ -295,8 +298,5 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \
   --lang=mix \
   --text=${BIN_DIR}/../sentences_mix.txt \
   --output_dir=exp/default/test_e2e \
-  --phones_dict=fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \
-  --speaker_dict=fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \
-  --spk_id=174 \
   --inference_dir=exp/default/inference
 ```
diff --git a/examples/zh_en_tts/tts3/local/mfa_download.sh b/examples/zh_en_tts/tts3/local/mfa_download.sh
new file mode 100755
index 000000000..1863c896d
--- /dev/null
+++ b/examples/zh_en_tts/tts3/local/mfa_download.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+exp=exp
+mfa=$exp/mfa
+
+mkdir -p $mfa
+
+pushd $mfa
+
+wget -c https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz &
+wget -c https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz &
+wget -c https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz &
+wget -c https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz &
+wait
+
+popd
diff --git a/examples/zh_en_tts/tts3/local/model_download.sh b/examples/zh_en_tts/tts3/local/model_download.sh
new file mode 100755
index 000000000..20a830b74
--- /dev/null
+++ b/examples/zh_en_tts/tts3/local/model_download.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+exp=exp
+pretrain=$exp/pretrain
+
+mkdir -p $pretrain
+
+pushd $pretrain
+
+wget -c https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip &
+wait
+
+popd
diff --git a/examples/zh_en_tts/tts3/run.sh b/examples/zh_en_tts/tts3/run.sh
index a4d86480b..a18421f5a 100755
--- a/examples/zh_en_tts/tts3/run.sh
+++ b/examples/zh_en_tts/tts3/run.sh
@@ -7,8 +7,8 @@ gpus=0,1
 stage=0
 stop_stage=100
 
-datasets_root_dir=~/datasets
-mfa_root_dir=./mfa_results/
+datasets_root_dir=./data
+mfa_root_dir=./data/mfa
 conf_path=conf/default.yaml
 train_output_path=exp/default
 ckpt_name=snapshot_iter_99200.pdz