From 7e88f2bf11698b5a13782c4f771776fe31ca0dd7 Mon Sep 17 00:00:00 2001
From: xiongxinlei <xiongxinlei@baidu.com>
Date: Wed, 27 Apr 2022 12:22:33 +0800
Subject: [PATCH 1/3] update streaming asr readme, test=doc

---
 demos/streaming_asr_server/README.md          | 10 +++--
 demos/streaming_asr_server/README_cn.md       | 14 +++---
 .../conf/application.yaml                     | 45 +++++++++++++++++++
 .../conf/ws_application.yaml                  |  4 +-
 .../conf/ws_conformer_application.yaml        |  4 +-
 5 files changed, 63 insertions(+), 14 deletions(-)
 create mode 100644 demos/streaming_asr_server/conf/application.yaml

diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md
index 6a2f21aa..83b8e05c 100644
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
@@ -5,6 +5,7 @@
 ## Introduction
 This demo is an implementation of starting the streaming speech service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python.
 
+Streaming ASR server only support `websocket` protocol, and doesn't support `http` protocol.
 
 ## Usage
 ### 1. Installation
@@ -114,7 +115,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 
   server_executor = ServerExecutor()
   server_executor(
-      config_file="./conf/ws_conformer_application.yaml", 
+      config_file="./conf/ws_conformer_application.yaml",
       log_file="./log/paddlespeech.log")
   ```
 
@@ -188,7 +189,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 **Note:** The response time will be slightly longer when using the client for the first time
 - Command Line (Recommended)
    ```
-   paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
+   paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav --protocol websocket
    ```
 
   Usage:
@@ -284,8 +285,9 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
       port=8090,
       sample_rate=16000,
       lang="zh_cn",
-      audio_format="wav")
-  print(res.json())
+      audio_format="wav",
+      protocol="websocket")
+  print(res)
   ```
 
   Output:
diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md
index 9224206b..9e5473fe 100644
--- a/demos/streaming_asr_server/README_cn.md
+++ b/demos/streaming_asr_server/README_cn.md
@@ -5,13 +5,14 @@
 ## 介绍
 这个demo是一个启动流式语音服务和访问服务的实现。 它可以通过使用`paddlespeech_server` 和 `paddlespeech_client`的单个命令或 python 的几行代码来实现。
 
+流式语音识别服务只支持 `weboscket` 协议，不支持 `http` 协议。
 
 ## 使用方法
 ### 1. 安装
 请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
 推荐使用 **paddlepaddle 2.2.1** 或以上版本。
-你可以从 medium，hard 三中方式中选择一种方式安装 PaddleSpeech。
+你可以从medium，hard 二中方式中选择一种方式安装 PaddleSpeech。
 
 
 ### 2. 准备配置文件
@@ -187,7 +188,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 **注意：** 初次使用客户端时响应时间会略长
 - 命令行 (推荐使用)
    ```
-   paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
+   paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav --protocol websocket
 
    ```
 
@@ -275,18 +276,19 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 
 - Python API
   ```python
-  from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor
+  from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor
   import json
 
-  asrclient_executor = ASROnlineClientExecutor()
+  asrclient_executor = ASRClientExecutor()
   res = asrclient_executor(
       input="./zh.wav",
       server_ip="127.0.0.1",
       port=8090,
       sample_rate=16000,
       lang="zh_cn",
-      audio_format="wav")
-  print(res.json())
+      audio_format="wav",
+      protocol="websocket")
+  print(res)
   ```
 
   输出:
diff --git a/demos/streaming_asr_server/conf/application.yaml b/demos/streaming_asr_server/conf/application.yaml
new file mode 100644
index 00000000..50c7a727
--- /dev/null
+++ b/demos/streaming_asr_server/conf/application.yaml
@@ -0,0 +1,45 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8090
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+    model_type: 'conformer_online_multicn'
+    am_model: # the pdmodel file of am static model [optional]
+    am_params:  # the pdiparams file of am static model [optional]
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: 
+    decode_method: 
+    force_yes: True
+    device: # cpu or gpu:id
+    am_predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
+    chunk_buffer_conf:
+        window_n: 7     # frame
+        shift_n: 4      # frame
+        window_ms: 25   # ms
+        shift_ms: 10    # ms
+        sample_rate: 16000
+        sample_width: 2
\ No newline at end of file
diff --git a/demos/streaming_asr_server/conf/ws_application.yaml b/demos/streaming_asr_server/conf/ws_application.yaml
index dee8d78b..fc02f2ca 100644
--- a/demos/streaming_asr_server/conf/ws_application.yaml
+++ b/demos/streaming_asr_server/conf/ws_application.yaml
@@ -7,8 +7,8 @@ host: 0.0.0.0
 port: 8090
 
 # The task format in the engin_list is: <speech task>_<engine type>
-# task choices = ['asr_online', 'tts_online']
-# protocol = ['websocket', 'http'] (only one can be selected).
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
 # websocket only support online engine type.
 protocol: 'websocket'
 engine_list: ['asr_online']
diff --git a/demos/streaming_asr_server/conf/ws_conformer_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_application.yaml
index 8f011485..50c7a727 100644
--- a/demos/streaming_asr_server/conf/ws_conformer_application.yaml
+++ b/demos/streaming_asr_server/conf/ws_conformer_application.yaml
@@ -7,8 +7,8 @@ host: 0.0.0.0
 port: 8090
 
 # The task format in the engin_list is: <speech task>_<engine type>
-# task choices = ['asr_online', 'tts_online']
-# protocol = ['websocket', 'http'] (only one can be selected).
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
 # websocket only support online engine type.
 protocol: 'websocket'
 engine_list: ['asr_online']

From cb9beabacedb2ae1f2cad6fbc7d0005f93eabe6e Mon Sep 17 00:00:00 2001
From: xiongxinlei <xiongxinlei@baidu.com>
Date: Wed, 27 Apr 2022 13:13:05 +0800
Subject: [PATCH 2/3] fix the sv ecapa-tdnn cpu training, test=doc

---
 examples/voxceleb/sv0/local/train.sh | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/examples/voxceleb/sv0/local/train.sh b/examples/voxceleb/sv0/local/train.sh
index 5477d0a3..674fedb3 100755
--- a/examples/voxceleb/sv0/local/train.sh
+++ b/examples/voxceleb/sv0/local/train.sh
@@ -42,15 +42,25 @@ device="cpu"
 if ${use_gpu}; then
     device="gpu"
 fi
+if [ $ngpu -le 0 ]; then 
+    echo "no gpu, training in cpu mode"
+    device='cpu'
+    use_gpu=false
+fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train the speaker identification task with voxceleb data
     # and we will create the trained model parameters in ${exp_dir}/model.pdparams as the soft link
     # Note: we will store the log file in exp/log directory
-    python3 -m paddle.distributed.launch --gpus=$CUDA_VISIBLE_DEVICES \
-        ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \
-        --data-dir ${dir} --config ${conf_path}
-
+    if $use_gpu; then
+        python3 -m paddle.distributed.launch --gpus=$CUDA_VISIBLE_DEVICES \
+            ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \
+            --data-dir ${dir} --config ${conf_path}
+    else
+        python3 \
+            ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \
+            --data-dir ${dir} --config ${conf_path}
+    fi
 fi 
 
 if [ $? -ne 0 ]; then

From 4c56e4d42cd7cfd991f94aedc712a2ae34bf8250 Mon Sep 17 00:00:00 2001
From: xiongxinlei <xiongxinlei@baidu.com>
Date: Wed, 27 Apr 2022 15:59:29 +0800
Subject: [PATCH 3/3] update the voxceleb readme.md, test=doc

---
 examples/voxceleb/sv0/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/voxceleb/sv0/README.md b/examples/voxceleb/sv0/README.md
index 567963e5..1069cfe7 100644
--- a/examples/voxceleb/sv0/README.md
+++ b/examples/voxceleb/sv0/README.md
@@ -142,7 +142,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 For example:
 ```
 wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz
-tar xzvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz
+tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz
 source path.sh
 # If you have processed the data and get the manifest file， you can skip the following 2 steps