From 90b7f88eb5366c7ae2e54831b2c307f1d0a3d378 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Mon, 6 Jun 2022 03:15:37 +0000
Subject: [PATCH] fix hifigan pad value

---
 demos/streaming_tts_server/README.md                      | 2 +-
 demos/streaming_tts_server/README_cn.md                   | 2 +-
 .../streaming_tts_server/conf/tts_online_application.yaml | 4 ++--
 paddlespeech/server/conf/tts_online_application.yaml      | 4 ++--
 tests/unit/server/online/tts/check_server/test.sh         | 8 ++++----
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/demos/streaming_tts_server/README.md b/demos/streaming_tts_server/README.md
index 775cd908..860d9a97 100644
--- a/demos/streaming_tts_server/README.md
+++ b/demos/streaming_tts_server/README.md
@@ -27,7 +27,7 @@ The configuration file can be found in `conf/tts_online_application.yaml`.
 - In streaming voc inference, one chunk of data is inferred at a time to achieve a streaming effect. Where `voc_block` indicates the number of valid frames in the chunk, and `voc_pad` indicates the number of frames added before and after the voc_block in a chunk. The existence of voc_pad is used to eliminate errors caused by streaming inference and avoid the influence of streaming inference on the quality of synthesized audio.
     - Both hifigan and mb_melgan support streaming voc inference.
     - When the voc model is mb_melgan, when voc_pad=14, the synthetic audio for streaming inference is consistent with the non-streaming synthetic audio; the minimum voc_pad can be set to 7, and the synthetic audio has no abnormal hearing. If the voc_pad is less than 7, the synthetic audio sounds abnormal.
-    - When the voc model is hifigan, when voc_pad=20, the streaming inference synthetic audio is consistent with the non-streaming synthetic audio; when voc_pad=14, the synthetic audio has no abnormal hearing.
+    - When the voc model is hifigan, when voc_pad=19, the streaming inference synthetic audio is consistent with the non-streaming synthetic audio; when voc_pad=14, the synthetic audio has no abnormal hearing.
 - Inference speed: mb_melgan > hifigan; Audio quality: mb_melgan < hifigan
 - **Note:** If the service can be started normally in the container, but the client access IP is unreachable, you can try to replace the `host` address in the configuration file with the local IP address.
 
diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index 9c2cc50e..254ec26a 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -27,7 +27,7 @@
 - 流式 voc 推理中，每次会对一个 chunk 的数据进行推理以达到流式的效果。其中 `voc_block` 表示chunk中的有效帧数，`voc_pad` 表示一个 chunk 中 voc_block 前后各加的帧数。voc_pad 的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
     - hifigan, mb_melgan 均支持流式 voc 推理
     - 当 voc 模型为 mb_melgan，当 voc_pad=14 时，流式推理合成音频与非流式合成音频一致；voc_pad 最小可以设置为7，合成音频听感上没有异常，若 voc_pad 小于7，合成音频听感上存在异常。
-    - 当 voc 模型为 hifigan，当 voc_pad=20 时，流式推理合成音频与非流式合成音频一致；当 voc_pad=14 时，合成音频听感上没有异常。
+    - 当 voc 模型为 hifigan，当 voc_pad=19 时，流式推理合成音频与非流式合成音频一致；当 voc_pad=14 时，合成音频听感上没有异常。
 - 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
 - **注意：** 如果在容器里可正常启动服务，但客户端访问 ip 不可达，可尝试将配置文件中 `host` 地址换成本地 ip 地址。
 
diff --git a/demos/streaming_tts_server/conf/tts_online_application.yaml b/demos/streaming_tts_server/conf/tts_online_application.yaml
index 964e85ef..0460a5e1 100644
--- a/demos/streaming_tts_server/conf/tts_online_application.yaml
+++ b/demos/streaming_tts_server/conf/tts_online_application.yaml
@@ -47,7 +47,7 @@ tts_online:
     am_pad: 12
     # voc_pad and voc_block voc model to streaming voc infer,
     # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
-    # when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 36
     voc_pad: 14
     
@@ -95,7 +95,7 @@ tts_online-onnx:
     am_pad: 12
     # voc_pad and voc_block voc model to streaming voc infer,
     # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
-    # when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 36
     voc_pad: 14
     # voc_upsample should be same as n_shift on voc config.
diff --git a/paddlespeech/server/conf/tts_online_application.yaml b/paddlespeech/server/conf/tts_online_application.yaml
index 964e85ef..0460a5e1 100644
--- a/paddlespeech/server/conf/tts_online_application.yaml
+++ b/paddlespeech/server/conf/tts_online_application.yaml
@@ -47,7 +47,7 @@ tts_online:
     am_pad: 12
     # voc_pad and voc_block voc model to streaming voc infer,
     # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
-    # when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 36
     voc_pad: 14
     
@@ -95,7 +95,7 @@ tts_online-onnx:
     am_pad: 12
     # voc_pad and voc_block voc model to streaming voc infer,
     # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
-    # when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 36
     voc_pad: 14
     # voc_upsample should be same as n_shift on voc config.
diff --git a/tests/unit/server/online/tts/check_server/test.sh b/tests/unit/server/online/tts/check_server/test.sh
index 766aea85..c62c54c7 100644
--- a/tests/unit/server/online/tts/check_server/test.sh
+++ b/tests/unit/server/online/tts/check_server/test.sh
@@ -28,7 +28,7 @@ StartService(){
 ClientTest_http(){
     for ((i=1; i<=3;i++))
     do
-    paddlespeech_client tts_online --input "您好，欢迎使用百度飞桨深度学习框架。" 
+    paddlespeech_client tts_online --input "您好，欢迎使用百度飞桨深度学习框架。" --port $port
     ((http_test_times+=1))
     done
 }
@@ -36,7 +36,7 @@ ClientTest_http(){
 ClientTest_ws(){
     for ((i=1; i<=3;i++))
     do
-    paddlespeech_client tts_online --input "您好，欢迎使用百度飞桨深度学习框架。" --protocol websocket
+    paddlespeech_client tts_online --input "您好，欢迎使用百度飞桨深度学习框架。" --protocol websocket --port $port
     ((ws_test_times+=1))
     done
 }
@@ -54,7 +54,7 @@ GetTestResult_http() {
 
 GetTestResult_ws() {
     # Determine if the test was successful
-    ws_response_success_time=$(cat $log/server.log.wf | grep "Complete the transmission of audio streams" -c)
+    ws_response_success_time=$(cat $log/server.log.wf | grep "Complete the synthesis of the audio streams" -c)
     if (( $ws_response_success_time == $ws_test_times )) ; then
         echo "Testing successfully. $info"  | tee -a $log/test_result.log
     else
@@ -313,4 +313,4 @@ cat $log/test_result.log
 
 # Restoring conf is the same as demos/speech_server
 cp ./tts_online_application.yaml ./conf/application.yaml -rf
-sleep 2s
\ No newline at end of file
+sleep 2s