From b68c9c05c4c38ee5c5765db733aed8062eee4439 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 2 Nov 2021 02:54:16 +0000 Subject: [PATCH] fix fs2 inference bug --- examples/csmsc/tts2/README.md | 2 +- examples/csmsc/tts3/README.md | 8 ++++++++ examples/csmsc/tts3/{ => local}/inference.sh | 0 examples/csmsc/tts3/run.sh | 5 +++++ parakeet/exps/fastspeech2/inference.py | 3 ++- 5 files changed, 16 insertions(+), 2 deletions(-) rename examples/csmsc/tts3/{ => local}/inference.sh (100%) diff --git a/examples/csmsc/tts2/README.md b/examples/csmsc/tts2/README.md index 4283e8cc..18eaf2f8 100644 --- a/examples/csmsc/tts2/README.md +++ b/examples/csmsc/tts2/README.md @@ -19,7 +19,7 @@ Run the command below to 4. synthesize wavs. - synthesize waveform from `metadata.jsonl`. - synthesize waveform from text file. -6. inference using static model. +5. inference using static model. ```bash ./run.sh ``` diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md index 219bec79..cb202c3a 100644 --- a/examples/csmsc/tts3/README.md +++ b/examples/csmsc/tts3/README.md @@ -19,6 +19,7 @@ Run the command below to 4. synthesize wavs. - synthesize waveform from `metadata.jsonl`. - synthesize waveform from text file. +5. inference using static model. ```bash ./run.sh ``` @@ -189,6 +190,13 @@ optional arguments: 5. `--output-dir` is the directory to save synthesized audio files. 6. `--device is` the type of device to run synthesis, 'cpu' and 'gpu' are supported. 'gpu' is recommended for faster synthesis. +### Inference +After Synthesize, we will get static models of fastspeech2 and pwgan in `${train_output_path}/inference`. +`./local/inference.sh` calls `${BIN_DIR}/inference.py`, which provides a paddle static model inference example for fastspeech2 + pwgan synthesize. +```bash +CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} +``` + ## Pretrained Model Pretrained FastSpeech2 model with no silence in the edge of audios. [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_0.4.zip) diff --git a/examples/csmsc/tts3/inference.sh b/examples/csmsc/tts3/local/inference.sh similarity index 100% rename from examples/csmsc/tts3/inference.sh rename to examples/csmsc/tts3/local/inference.sh diff --git a/examples/csmsc/tts3/run.sh b/examples/csmsc/tts3/run.sh index f45ddab0..718d6076 100755 --- a/examples/csmsc/tts3/run.sh +++ b/examples/csmsc/tts3/run.sh @@ -35,3 +35,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # synthesize_e2e, vocoder is pwgan CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1 fi + +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + # inference with static model + CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1 +fi diff --git a/parakeet/exps/fastspeech2/inference.py b/parakeet/exps/fastspeech2/inference.py index 9926541c..43676088 100644 --- a/parakeet/exps/fastspeech2/inference.py +++ b/parakeet/exps/fastspeech2/inference.py @@ -48,7 +48,8 @@ def main(): str(Path(args.inference_dir) / "fastspeech2.pdmodel"), str(Path(args.inference_dir) / "fastspeech2.pdiparams")) fastspeech2_config.enable_use_gpu(50, 0) - fastspeech2_config.enable_memory_optim() + # This line must be commented, if not, it will OOM + # fastspeech2_config.enable_memory_optim() fastspeech2_predictor = inference.create_predictor(fastspeech2_config) pwg_config = inference.Config(