From b68c9c05c4c38ee5c5765db733aed8062eee4439 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Tue, 2 Nov 2021 02:54:16 +0000
Subject: [PATCH] fix fs2 inference bug

---
 examples/csmsc/tts2/README.md                | 2 +-
 examples/csmsc/tts3/README.md                | 8 ++++++++
 examples/csmsc/tts3/{ => local}/inference.sh | 0
 examples/csmsc/tts3/run.sh                   | 5 +++++
 parakeet/exps/fastspeech2/inference.py       | 3 ++-
 5 files changed, 16 insertions(+), 2 deletions(-)
 rename examples/csmsc/tts3/{ => local}/inference.sh (100%)

diff --git a/examples/csmsc/tts2/README.md b/examples/csmsc/tts2/README.md
index 4283e8cc..18eaf2f8 100644
--- a/examples/csmsc/tts2/README.md
+++ b/examples/csmsc/tts2/README.md
@@ -19,7 +19,7 @@ Run the command below to
 4. synthesize wavs.
     - synthesize waveform from `metadata.jsonl`.
     - synthesize waveform from text file.
-6. inference using static model.
+5. inference using static model.
 ```bash
 ./run.sh
 ```
diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md
index 219bec79..cb202c3a 100644
--- a/examples/csmsc/tts3/README.md
+++ b/examples/csmsc/tts3/README.md
@@ -19,6 +19,7 @@ Run the command below to
 4. synthesize wavs.
     - synthesize waveform from `metadata.jsonl`.
     - synthesize waveform from text file.
+5. inference using static model.
 ```bash
 ./run.sh
 ```
@@ -189,6 +190,13 @@ optional arguments:
 5. `--output-dir` is the directory to save synthesized audio files.
 6. `--device is` the type of device to run synthesis, 'cpu' and 'gpu' are supported. 'gpu' is recommended for faster synthesis.
 
+### Inference
+After Synthesize, we will get static models of fastspeech2 and pwgan in `${train_output_path}/inference`.
+`./local/inference.sh` calls `${BIN_DIR}/inference.py`, which provides a paddle static model inference example for fastspeech2 + pwgan synthesize.
+```bash
+CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
+```
+
 ## Pretrained Model
 Pretrained FastSpeech2 model with no silence in the edge of audios. [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_0.4.zip)
 
diff --git a/examples/csmsc/tts3/inference.sh b/examples/csmsc/tts3/local/inference.sh
similarity index 100%
rename from examples/csmsc/tts3/inference.sh
rename to examples/csmsc/tts3/local/inference.sh
diff --git a/examples/csmsc/tts3/run.sh b/examples/csmsc/tts3/run.sh
index f45ddab0..718d6076 100755
--- a/examples/csmsc/tts3/run.sh
+++ b/examples/csmsc/tts3/run.sh
@@ -35,3 +35,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # synthesize_e2e, vocoder is pwgan
     CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
+
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    # inference with static model
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
+fi
diff --git a/parakeet/exps/fastspeech2/inference.py b/parakeet/exps/fastspeech2/inference.py
index 9926541c..43676088 100644
--- a/parakeet/exps/fastspeech2/inference.py
+++ b/parakeet/exps/fastspeech2/inference.py
@@ -48,7 +48,8 @@ def main():
         str(Path(args.inference_dir) / "fastspeech2.pdmodel"),
         str(Path(args.inference_dir) / "fastspeech2.pdiparams"))
     fastspeech2_config.enable_use_gpu(50, 0)
-    fastspeech2_config.enable_memory_optim()
+    # This line must be commented, if not, it will OOM
+    # fastspeech2_config.enable_memory_optim()
     fastspeech2_predictor = inference.create_predictor(fastspeech2_config)
 
     pwg_config = inference.Config(