fix fs2 inference bug

3 years ago · b68c9c05c4
parent 79e7a4d44e
commit b68c9c05c4
5 changed files with 16 additions and 2 deletions
--- a/examples/csmsc/tts2/README.md
+++ b/examples/csmsc/tts2/README.md
@ -19,7 +19,7 @@ Run the command below to
 4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
-6. inference using static model.
+5. inference using static model.
 ```bash
 ./run.sh
 ```
--- a/examples/csmsc/tts3/README.md
+++ b/examples/csmsc/tts3/README.md
@ -19,6 +19,7 @@ Run the command below to
 4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
+5. inference using static model.
 ```bash
 ./run.sh
 ```
@ -189,6 +190,13 @@ optional arguments:
 5. `--output-dir` is the directory to save synthesized audio files.
 6. `--device is` the type of device to run synthesis, 'cpu' and 'gpu' are supported. 'gpu' is recommended for faster synthesis.

+### Inference
+After Synthesize, we will get static models of fastspeech2 and pwgan in `${train_output_path}/inference`.
+`./local/inference.sh` calls `${BIN_DIR}/inference.py`, which provides a paddle static model inference example for fastspeech2 + pwgan synthesize.
+```bash
+CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
+```
+
 ## Pretrained Model
 Pretrained FastSpeech2 model with no silence in the edge of audios. [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_0.4.zip)

--- a/examples/csmsc/tts3/local/inference.sh
+++ b/examples/csmsc/tts3/local/inference.sh
--- a/examples/csmsc/tts3/run.sh
+++ b/examples/csmsc/tts3/run.sh
@ -35,3 +35,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
+
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    # inference with static model
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
+fi
--- a/parakeet/exps/fastspeech2/inference.py
+++ b/parakeet/exps/fastspeech2/inference.py
@ -48,7 +48,8 @@ def main():
        str(Path(args.inference_dir) / "fastspeech2.pdmodel"),
        str(Path(args.inference_dir) / "fastspeech2.pdiparams"))
    fastspeech2_config.enable_use_gpu(50, 0)
-    fastspeech2_config.enable_memory_optim()
+    # This line must be commented, if not, it will OOM
+    # fastspeech2_config.enable_memory_optim()
    fastspeech2_predictor = inference.create_predictor(fastspeech2_config)

    pwg_config = inference.Config(