From 651835f62ededbe594bab8c7417ad79e94a9e036 Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:23:35 +0800
Subject: [PATCH 1/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index 211dc3888..e99d67cf4 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -16,11 +16,11 @@
 
 ### 2. 准备配置文件
 配置文件可参见 `conf/tts_online_application.yaml` 。
-其中，`protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
-其中，`engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
+- `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
+- `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
-流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
+- 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
 
 ### 3. 服务端使用方法
 - 命令行 (推荐使用)

From ade75d2e0203ec81cbd654df617705ac57ce67df Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:45:48 +0800
Subject: [PATCH 2/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index e99d67cf4..a4248afcc 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -18,9 +18,17 @@
 配置文件可参见 `conf/tts_online_application.yaml` 。
 - `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
 - `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
-该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
-目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
+ - 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
+ - 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
 - 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
+- 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+ - fastspeech2不支持流式am推理，am_pad与am_block对它无效
+ - fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
+- 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+ - hifigan, mb_melgan 均支持流式voc 推理
+ - 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
+ - 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
+- 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
 
 ### 3. 服务端使用方法
 - 命令行 (推荐使用)

From e96126eda9a2eec46281105bd135ebfeb4b8a6fd Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:46:57 +0800
Subject: [PATCH 3/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index a4248afcc..d412f9360 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -18,16 +18,16 @@
 配置文件可参见 `conf/tts_online_application.yaml` 。
 - `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
 - `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
- - 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
- - 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
+ -- 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
+ -- 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
 - 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
 - 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
- - fastspeech2不支持流式am推理，am_pad与am_block对它无效
- - fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
+ -- fastspeech2不支持流式am推理，am_pad与am_block对它无效
+ -- fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
 - 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
- - hifigan, mb_melgan 均支持流式voc 推理
- - 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
- - 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
+ -- hifigan, mb_melgan 均支持流式voc 推理
+ -- 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
+ -- 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
 - 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
 
 ### 3. 服务端使用方法

From bd76079139375d14745eeb03f6b76315dcbd5751 Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:48:29 +0800
Subject: [PATCH 4/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 26 ++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index d412f9360..c772f49dd 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -16,19 +16,19 @@
 
 ### 2. 准备配置文件
 配置文件可参见 `conf/tts_online_application.yaml` 。
-- `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
-- `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
- -- 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
- -- 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
-- 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
-- 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
- -- fastspeech2不支持流式am推理，am_pad与am_block对它无效
- -- fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
-- 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
- -- hifigan, mb_melgan 均支持流式voc 推理
- -- 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
- -- 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
-- 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
+* `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
+* `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
+ ** 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
+ ** 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
+* 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
+* 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+ ** fastspeech2不支持流式am推理，am_pad与am_block对它无效
+ ** fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
+* 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+ ** hifigan, mb_melgan 均支持流式voc 推理
+ ** 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
+ ** 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
+* 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
 
 ### 3. 服务端使用方法
 - 命令行 (推荐使用)

From 5681c3edb5c25f7fb90a02bef4b467dee0c39d86 Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:49:17 +0800
Subject: [PATCH 5/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index c772f49dd..662ff14e8 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -18,16 +18,23 @@
 配置文件可参见 `conf/tts_online_application.yaml` 。
 * `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
 * `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
+
  ** 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
  ** 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
+
 * 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
+
 * 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+
  ** fastspeech2不支持流式am推理，am_pad与am_block对它无效
  ** fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
+ 
 * 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+
  ** hifigan, mb_melgan 均支持流式voc 推理
  ** 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
  ** 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
+ 
 * 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
 
 ### 3. 服务端使用方法

From 429ee6c1031b2ada1ae23275ea22247036801794 Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:49:41 +0800
Subject: [PATCH 6/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index 662ff14e8..8c2d6d339 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -19,8 +19,8 @@
 * `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
 * `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
 
- ** 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
- ** 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
+ * 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
+ * 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
 
 * 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
 

From 3fa01f55453b6b98b77364b32e4677427851276d Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:50:32 +0800
Subject: [PATCH 7/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index 8c2d6d339..d56a268f3 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -16,11 +16,10 @@
 
 ### 2. 准备配置文件
 配置文件可参见 `conf/tts_online_application.yaml` 。
-* `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
-* `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
-
- * 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
- * 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
+- `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
+- `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
+    - 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
+    - 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
 
 * 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
 

From fef696e7f40390fdf328b928edb02ee0e8f07651 Mon Sep 17 00:00:00 2001
From: liangym <34430015+lym0302@users.noreply.github.com>
Date: Mon, 25 Apr 2022 14:51:37 +0800
Subject: [PATCH 8/9] Update README_cn.md

---
 demos/streaming_tts_server/README_cn.md | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index d56a268f3..0e20ae700 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -20,21 +20,15 @@
 - `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
     - 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
     - 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
-
-* 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
-
-* 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
-
- ** fastspeech2不支持流式am推理，am_pad与am_block对它无效
- ** fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
- 
-* 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
-
- ** hifigan, mb_melgan 均支持流式voc 推理
- ** 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
- ** 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
- 
-* 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
+- 流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
+- 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+    - fastspeech2不支持流式am推理，am_pad与am_block对它无效
+    - fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
+- 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+    - hifigan, mb_melgan 均支持流式voc 推理
+    - 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
+    - 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
+- 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
 
 ### 3. 服务端使用方法
 - 命令行 (推荐使用)

From 651012616a9bda276040ca308e336094cfa55584 Mon Sep 17 00:00:00 2001
From: lym0302 <lym0302@foxmail.com>
Date: Mon, 25 Apr 2022 15:08:08 +0800
Subject: [PATCH 9/9] add info, test=doc

---
 demos/streaming_tts_server/README.md          | 21 ++++++++++-----
 demos/streaming_tts_server/README_cn.md       | 18 +++++++++----
 .../conf/tts_online_application.yaml          | 25 +++++++++++++----
 .../server/conf/tts_online_application.yaml   | 27 ++++++++++++++-----
 setup.py                                      |  2 --
 5 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/demos/streaming_tts_server/README.md b/demos/streaming_tts_server/README.md
index 801c4f31a..c974cd9d1 100644
--- a/demos/streaming_tts_server/README.md
+++ b/demos/streaming_tts_server/README.md
@@ -15,12 +15,21 @@ You can choose one way from meduim and hard to install paddlespeech.
 
 
 ### 2. Prepare config File
-The configuration file can be found in `conf/tts_online_application.yaml` 。
-Among them, `protocol` indicates the network protocol used by the streaming TTS service. Currently, both http and websocket are supported.
-`engine_list` indicates the speech engine that will be included in the service to be started, in the format of `<speech task>_<engine type>`.
-This demo mainly introduces the streaming speech synthesis service, so the speech task should be set to `tts`.
-Currently, the engine type supports two forms: **online**  and **online-onnx**. `online` indicates an engine that uses python for dynamic graph inference; `online-onnx` indicates an engine that uses onnxruntime for inference. The inference speed of online-onnx is faster.
-Streaming TTS AM model support: **fastspeech2 and fastspeech2_cnndecoder**; Voc model support: **hifigan and mb_melgan**
+The configuration file can be found in `conf/tts_online_application.yaml`.
+- `protocol` indicates the network protocol used by the streaming TTS service. Currently, both http and websocket are supported.
+- `engine_list` indicates the speech engine that will be included in the service to be started, in the format of `<speech task>_<engine type>`.
+    - This demo mainly introduces the streaming speech synthesis service, so the speech task should be set to `tts`.
+    - the engine type supports two forms: **online**  and **online-onnx**. `online` indicates an engine that uses python for dynamic graph inference; `online-onnx` indicates an engine that uses onnxruntime for inference. The inference speed of online-onnx is faster.
+- Streaming TTS engine AM model support: **fastspeech2 and fastspeech2_cnndecoder**; Voc model support: **hifigan and mb_melgan**
+- In streaming am inference, one chunk of data is inferred at a time to achieve a streaming effect. Among them, `am_block` indicates the number of valid frames in the chunk, and `am_pad` indicates the number of frames added before and after am_block in a chunk. The existence of am_pad is used to eliminate errors caused by streaming inference and avoid the influence of streaming inference on the quality of synthesized audio.
+    - fastspeech2 does not support streaming am inference, so am_pad and am_block have no effect on it.
+    - fastspeech2_cnndecoder supports streaming inference. When am_pad=12, streaming inference synthesized audio is consistent with non-streaming synthesized audio.
+- In streaming voc inference, one chunk of data is inferred at a time to achieve a streaming effect. Where `voc_block` indicates the number of valid frames in the chunk, and `voc_pad` indicates the number of frames added before and after the voc_block in a chunk. The existence of voc_pad is used to eliminate errors caused by streaming inference and avoid the influence of streaming inference on the quality of synthesized audio.
+    - Both hifigan and mb_melgan support streaming voc inference.
+    - When the voc model is mb_melgan, when voc_pad=14, the synthetic audio for streaming inference is consistent with the non-streaming synthetic audio; the minimum voc_pad can be set to 7, and the synthetic audio has no abnormal hearing. If the voc_pad is less than 7, the synthetic audio sounds abnormal.
+    - When the voc model is hifigan, when voc_pad=20, the streaming inference synthetic audio is consistent with the non-streaming synthetic audio; when voc_pad=14, the synthetic audio has no abnormal hearing.
+- Inference speed: mb_melgan > hifigan; Audio quality: mb_melgan < hifigan
+
 
 
 ### 3. Server Usage
diff --git a/demos/streaming_tts_server/README_cn.md b/demos/streaming_tts_server/README_cn.md
index 211dc3888..01194b2f7 100644
--- a/demos/streaming_tts_server/README_cn.md
+++ b/demos/streaming_tts_server/README_cn.md
@@ -16,11 +16,19 @@
 
 ### 2. 准备配置文件
 配置文件可参见 `conf/tts_online_application.yaml` 。
-其中，`protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
-其中，`engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
-该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
-目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
-流式TTS的AM 模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
+- `protocol`表示该流式TTS服务使用的网络协议，目前支持 http 和 websocket 两种。
+- `engine_list`表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
+    - 该demo主要介绍流式语音合成服务，因此语音任务应设置为tts。
+    - 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用onnxruntime进行推理的引擎。其中，online-onnx的推理速度更快。
+- 流式TTS引擎的AM模型支持：fastspeech2 以及fastspeech2_cnndecoder; Voc 模型支持：hifigan, mb_melgan
+- 流式am推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`am_block`表示chunk中的有效帧数，`am_pad` 表示一个chunk中am_block前后各加的帧数。am_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+    - fastspeech2不支持流式am推理，因此am_pad与am_block对它无效
+    - fastspeech2_cnndecoder 支持流式推理，当am_pad=12时，流式推理合成音频与非流式合成音频一致
+- 流式voc推理中，每次会对一个chunk的数据进行推理以达到流式的效果。其中`voc_block`表示chunk中的有效帧数，`voc_pad` 表示一个chunk中voc_block前后各加的帧数。voc_pad的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
+    - hifigan, mb_melgan 均支持流式voc 推理
+    - 当voc模型为mb_melgan，当voc_pad=14时，流式推理合成音频与非流式合成音频一致；voc_pad最小可以设置为7，合成音频听感上没有异常，若voc_pad小于7，合成音频听感上存在异常。
+    - 当voc模型为hifigan，当voc_pad=20时，流式推理合成音频与非流式合成音频一致；当voc_pad=14时，合成音频听感上没有异常。
+- 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
 
 ### 3. 服务端使用方法
 - 命令行 (推荐使用)
diff --git a/demos/streaming_tts_server/conf/tts_online_application.yaml b/demos/streaming_tts_server/conf/tts_online_application.yaml
index 353c3e328..67d4641a0 100644
--- a/demos/streaming_tts_server/conf/tts_online_application.yaml
+++ b/demos/streaming_tts_server/conf/tts_online_application.yaml
@@ -1,4 +1,4 @@
-# This is the parameter configuration file for PaddleSpeech Serving.
+# This is the parameter configuration file for streaming tts server.
 
 #################################################################################
 #                             SERVER SETTING                                    #
@@ -7,8 +7,8 @@ host: 127.0.0.1
 port: 8092
 
 # The task format in the engin_list is: <speech task>_<engine type>
-# engine_list choices = ['tts_online', 'tts_online-onnx']
-# protocol = ['websocket', 'http'] (only one can be selected).
+# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
+# protocol choices = ['websocket', 'http'] 
 protocol: 'http'
 engine_list: ['tts_online-onnx']
 
@@ -20,7 +20,8 @@ engine_list: ['tts_online-onnx']
 ################################### TTS #########################################
 ################### speech task: tts; engine_type: online #######################
 tts_online: 
-    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']        
+    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']   
+    # fastspeech2_cnndecoder_csmsc support streaming am infer.     
     am: 'fastspeech2_csmsc'   
     am_config: 
     am_ckpt: 
@@ -31,6 +32,7 @@ tts_online:
     spk_id: 0
 
     # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
+    # Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
     voc: 'mb_melgan_csmsc'
     voc_config: 
     voc_ckpt: 
@@ -39,8 +41,13 @@ tts_online:
     # others
     lang: 'zh'
     device: 'cpu' # set 'gpu:id' or 'cpu'
+    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
+    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
     am_block: 42
     am_pad: 12
+    # voc_pad and voc_block voc model to streaming voc infer,
+    # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 14
     voc_pad: 14
     
@@ -53,7 +60,8 @@ tts_online:
 ################################### TTS #########################################
 ################### speech task: tts; engine_type: online-onnx #######################
 tts_online-onnx: 
-    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']        
+    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
+    # fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.        
     am: 'fastspeech2_cnndecoder_csmsc_onnx' 
     # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
     # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
@@ -70,6 +78,7 @@ tts_online-onnx:
         cpu_threads: 4
 
     # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
+    # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
     voc: 'hifigan_csmsc_onnx'
     voc_ckpt: 
     voc_sample_rate: 24000
@@ -80,9 +89,15 @@ tts_online-onnx:
 
     # others
     lang: 'zh'
+    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
+    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
     am_block: 42
     am_pad: 12
+    # voc_pad and voc_block voc model to streaming voc infer,
+    # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 14
     voc_pad: 14
+    # voc_upsample should be same as n_shift on voc config.
     voc_upsample: 300
     
diff --git a/paddlespeech/server/conf/tts_online_application.yaml b/paddlespeech/server/conf/tts_online_application.yaml
index 6214188d7..67d4641a0 100644
--- a/paddlespeech/server/conf/tts_online_application.yaml
+++ b/paddlespeech/server/conf/tts_online_application.yaml
@@ -1,4 +1,4 @@
-# This is the parameter configuration file for PaddleSpeech Serving.
+# This is the parameter configuration file for streaming tts server.
 
 #################################################################################
 #                             SERVER SETTING                                    #
@@ -7,8 +7,8 @@ host: 127.0.0.1
 port: 8092
 
 # The task format in the engin_list is: <speech task>_<engine type>
-# task choices = ['tts_online', 'tts_online-onnx']
-# protocol = ['websocket', 'http'] (only one can be selected).
+# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
+# protocol choices = ['websocket', 'http'] 
 protocol: 'http'
 engine_list: ['tts_online-onnx']
 
@@ -20,8 +20,9 @@ engine_list: ['tts_online-onnx']
 ################################### TTS #########################################
 ################### speech task: tts; engine_type: online #######################
 tts_online: 
-    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']        
-    am: 'fastspeech2_cnndecoder_csmsc'   
+    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']   
+    # fastspeech2_cnndecoder_csmsc support streaming am infer.     
+    am: 'fastspeech2_csmsc'   
     am_config: 
     am_ckpt: 
     am_stat: 
@@ -31,6 +32,7 @@ tts_online:
     spk_id: 0
 
     # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
+    # Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
     voc: 'mb_melgan_csmsc'
     voc_config: 
     voc_ckpt: 
@@ -39,8 +41,13 @@ tts_online:
     # others
     lang: 'zh'
     device: 'cpu' # set 'gpu:id' or 'cpu'
+    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
+    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
     am_block: 42
     am_pad: 12
+    # voc_pad and voc_block voc model to streaming voc infer,
+    # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 14
     voc_pad: 14
     
@@ -53,7 +60,8 @@ tts_online:
 ################################### TTS #########################################
 ################### speech task: tts; engine_type: online-onnx #######################
 tts_online-onnx: 
-    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']        
+    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
+    # fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.        
     am: 'fastspeech2_cnndecoder_csmsc_onnx' 
     # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
     # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
@@ -70,6 +78,7 @@ tts_online-onnx:
         cpu_threads: 4
 
     # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
+    # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
     voc: 'hifigan_csmsc_onnx'
     voc_ckpt: 
     voc_sample_rate: 24000
@@ -80,9 +89,15 @@ tts_online-onnx:
 
     # others
     lang: 'zh'
+    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
+    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
     am_block: 42
     am_pad: 12
+    # voc_pad and voc_block voc model to streaming voc infer,
+    # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
     voc_block: 14
     voc_pad: 14
+    # voc_upsample should be same as n_shift on voc config.
     voc_upsample: 300
     
diff --git a/setup.py b/setup.py
index 34c0baa3d..912fdd6d1 100644
--- a/setup.py
+++ b/setup.py
@@ -73,8 +73,6 @@ server = [
     "uvicorn",
     "pattern_singleton",
     "websockets",
-    "websocket",
-    "websocket-client",
 ]
 
 requirements = {