From 96b5b18c2ecaa1bba242c85d6134b373b43bcdb7 Mon Sep 17 00:00:00 2001
From: rich04lin <152049331+rich04lin@users.noreply.github.com>
Date: Wed, 19 Mar 2025 16:52:30 +0800
Subject: [PATCH] num5

---
 examples/csmsc/tts0/README.md | 51 ++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/examples/csmsc/tts0/README.md b/examples/csmsc/tts0/README.md
index 8fef32845..9aa46cc87 100644
--- a/examples/csmsc/tts0/README.md
+++ b/examples/csmsc/tts0/README.md
@@ -99,6 +99,55 @@ pwg_baker_ckpt_0.4
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+```text
+usage: synthesize.py [-h]
+                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
+                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
+                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
+                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
+                     [--voice-cloning VOICE_CLONING]
+                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
+                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
+                     [--voc_stat VOC_STAT] [--ngpu NGPU]
+                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]
+
+Synthesize with acoustic model & vocoder
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
+                        Choose acoustic model type of tts task.
+  --am_config AM_CONFIG
+                        Config of acoustic model.
+  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
+  --am_stat AM_STAT     mean and standard deviation used to normalize
+                        spectrogram when training acoustic model.
+  --phones_dict PHONES_DICT
+                        phone vocabulary file.
+  --tones_dict TONES_DICT
+                        tone vocabulary file.
+  --speaker_dict SPEAKER_DICT
+                        speaker id map file.
+  --voice-cloning VOICE_CLONING
+                        whether training voice cloning model.
+  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
+                        Choose vocoder type of tts task.
+  --voc_config VOC_CONFIG
+                        Config of voc.
+  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
+  --voc_stat VOC_STAT   mean and standard deviation used to normalize
+                        spectrogram when training voc.
+  --ngpu NGPU           if ngpu == 0, use cpu.
+  --test_metadata TEST_METADATA
+                        test metadata.
+  --output_dir OUTPUT_DIR
+                        output dir.
+```
+`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
+```bash
 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
 `--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
@@ -199,4 +248,4 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \
   --output_dir=exp/default/test_e2e \
   --inference_dir=exp/default/inference \
   --phones_dict=tacotron2_csmsc_ckpt_0.2.0/phone_id_map.txt
-```
+```
\ No newline at end of file