From caaa5cd50202a37a81133bd015d8c07a91456041 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 12 Jul 2022 08:24:39 +0000
Subject: [PATCH 1/2] more cli for speech demos

---
 demos/custom_streaming_asr/setup_docker.sh     |  0
 demos/keyword_spotting/run.sh                  |  0
 demos/speaker_verification/run.sh              |  0
 demos/speech_recognition/run.sh                | 18 +++++++++++++++++-
 demos/speech_server/asr_client.sh              |  0
 demos/speech_server/cls_client.sh              |  0
 demos/speech_server/server.sh                  |  2 +-
 demos/speech_server/sid_client.sh              | 10 ++++++++++
 demos/speech_server/text_client.sh             |  4 ++++
 demos/speech_server/tts_client.sh              |  0
 .../{ => local}/punc_server.py                 |  0
 .../{ => local}/streaming_asr_server.py        |  0
 demos/streaming_asr_server/run.sh              |  0
 demos/streaming_asr_server/server.sh           |  7 +++----
 demos/streaming_asr_server/test.sh             |  2 +-
 demos/streaming_tts_server/start_server.sh     |  3 ---
 demos/streaming_tts_server/test_client.sh      |  9 ---------
 demos/text_to_speech/run.sh                    |  8 +++++++-
 paddlespeech/server/bin/paddlespeech_client.py |  1 +
 19 files changed, 44 insertions(+), 20 deletions(-)
 mode change 100644 => 100755 demos/custom_streaming_asr/setup_docker.sh
 mode change 100644 => 100755 demos/keyword_spotting/run.sh
 mode change 100644 => 100755 demos/speaker_verification/run.sh
 mode change 100644 => 100755 demos/speech_recognition/run.sh
 mode change 100644 => 100755 demos/speech_server/asr_client.sh
 mode change 100644 => 100755 demos/speech_server/cls_client.sh
 mode change 100644 => 100755 demos/speech_server/server.sh
 create mode 100755 demos/speech_server/sid_client.sh
 create mode 100755 demos/speech_server/text_client.sh
 mode change 100644 => 100755 demos/speech_server/tts_client.sh
 rename demos/streaming_asr_server/{ => local}/punc_server.py (100%)
 rename demos/streaming_asr_server/{ => local}/streaming_asr_server.py (100%)
 mode change 100644 => 100755 demos/streaming_asr_server/run.sh
 delete mode 100644 demos/streaming_tts_server/start_server.sh
 delete mode 100644 demos/streaming_tts_server/test_client.sh

diff --git a/demos/custom_streaming_asr/setup_docker.sh b/demos/custom_streaming_asr/setup_docker.sh
old mode 100644
new mode 100755
diff --git a/demos/keyword_spotting/run.sh b/demos/keyword_spotting/run.sh
old mode 100644
new mode 100755
diff --git a/demos/speaker_verification/run.sh b/demos/speaker_verification/run.sh
old mode 100644
new mode 100755
diff --git a/demos/speech_recognition/run.sh b/demos/speech_recognition/run.sh
old mode 100644
new mode 100755
index 19ce0ebb3..e48ff3e96
--- a/demos/speech_recognition/run.sh
+++ b/demos/speech_recognition/run.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
 
 # asr
 paddlespeech asr --input ./zh.wav
@@ -8,3 +9,18 @@ paddlespeech asr --input ./zh.wav
 
 # asr + punc
 paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+
+
+# asr help
+paddlespeech asr --help
+
+
+# english asr
+paddlespeech asr --lang en --model transformer_librispeech --input ./en.wav
+
+# model stats
+paddlespeech stats --task asr
+
+
+# paddlespeech help
+paddlespeech --help
diff --git a/demos/speech_server/asr_client.sh b/demos/speech_server/asr_client.sh
old mode 100644
new mode 100755
diff --git a/demos/speech_server/cls_client.sh b/demos/speech_server/cls_client.sh
old mode 100644
new mode 100755
diff --git a/demos/speech_server/server.sh b/demos/speech_server/server.sh
old mode 100644
new mode 100755
index e5961286b..fd719ffc1
--- a/demos/speech_server/server.sh
+++ b/demos/speech_server/server.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-paddlespeech_server start --config_file ./conf/application.yaml
+paddlespeech_server start --config_file ./conf/application.yaml &> server.log &
diff --git a/demos/speech_server/sid_client.sh b/demos/speech_server/sid_client.sh
new file mode 100755
index 000000000..99bab21ae
--- /dev/null
+++ b/demos/speech_server/sid_client.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
+
+# sid extract
+paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav
+
+# sid score
+paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task score --enroll ./85236145389.wav --test ./123456789.wav
diff --git a/demos/speech_server/text_client.sh b/demos/speech_server/text_client.sh
new file mode 100755
index 000000000..098f159fb
--- /dev/null
+++ b/demos/speech_server/text_client.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+
+paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭
diff --git a/demos/speech_server/tts_client.sh b/demos/speech_server/tts_client.sh
old mode 100644
new mode 100755
diff --git a/demos/streaming_asr_server/punc_server.py b/demos/streaming_asr_server/local/punc_server.py
similarity index 100%
rename from demos/streaming_asr_server/punc_server.py
rename to demos/streaming_asr_server/local/punc_server.py
diff --git a/demos/streaming_asr_server/streaming_asr_server.py b/demos/streaming_asr_server/local/streaming_asr_server.py
similarity index 100%
rename from demos/streaming_asr_server/streaming_asr_server.py
rename to demos/streaming_asr_server/local/streaming_asr_server.py
diff --git a/demos/streaming_asr_server/run.sh b/demos/streaming_asr_server/run.sh
old mode 100644
new mode 100755
diff --git a/demos/streaming_asr_server/server.sh b/demos/streaming_asr_server/server.sh
index f532546e7..961cb046a 100755
--- a/demos/streaming_asr_server/server.sh
+++ b/demos/streaming_asr_server/server.sh
@@ -1,9 +1,8 @@
-export CUDA_VISIBLE_DEVICE=0,1,2,3
- export CUDA_VISIBLE_DEVICE=0,1,2,3
+#export CUDA_VISIBLE_DEVICE=0,1,2,3
 
-# nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
+# nohup python3 local/punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
 paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &
 
-# nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
+# nohup python3 local/streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
 paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log  &
 
diff --git a/demos/streaming_asr_server/test.sh b/demos/streaming_asr_server/test.sh
index 67a5ec4c5..386c7f894 100755
--- a/demos/streaming_asr_server/test.sh
+++ b/demos/streaming_asr_server/test.sh
@@ -7,5 +7,5 @@ paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wa
 
 # read the wav and call streaming and punc service
 # If `127.0.0.1` is not accessible, you need to use the actual service IP address.
-paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8290 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav
+paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav
 
diff --git a/demos/streaming_tts_server/start_server.sh b/demos/streaming_tts_server/start_server.sh
deleted file mode 100644
index 9c71f2fe2..000000000
--- a/demos/streaming_tts_server/start_server.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-# start server
-paddlespeech_server start --config_file ./conf/tts_online_application.yaml
\ No newline at end of file
diff --git a/demos/streaming_tts_server/test_client.sh b/demos/streaming_tts_server/test_client.sh
deleted file mode 100644
index bd88f20b1..000000000
--- a/demos/streaming_tts_server/test_client.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-# http client test
-# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
-paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
-
-# websocket client test
-# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
-# paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol websocket --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
diff --git a/demos/text_to_speech/run.sh b/demos/text_to_speech/run.sh
index b1340241b..2b588be55 100755
--- a/demos/text_to_speech/run.sh
+++ b/demos/text_to_speech/run.sh
@@ -4,4 +4,10 @@
 paddlespeech tts --input 今天的天气不错啊
 
 # Batch process
-echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
\ No newline at end of file
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+
+# Text Frontend
+paddlespeech tts --input 今天是2022/10/29,最低温度是-3℃.
+
+
+
diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py
index e8e57fff0..96368c0f3 100644
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -718,6 +718,7 @@ class VectorClientExecutor(BaseExecutor):
             logger.info(f"the input audio: {input}")
             handler = VectorHttpHandler(server_ip=server_ip, port=port)
             res = handler.run(input, audio_format, sample_rate)
+            logger.info(f"The spk embedding is: {res}")
             return res
         elif task == "score":
             from paddlespeech.server.utils.audio_handler import VectorScoreHttpHandler

From 74245cc115122e2f1774d720be3c068ee4b52525 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 12 Jul 2022 09:40:48 +0000
Subject: [PATCH 2/2] add streaming tts scripts

---
 demos/streaming_tts_server/client.sh          |   9 ++
 .../conf/tts_online_ws_application.yaml       | 103 ++++++++++++++++++
 demos/streaming_tts_server/server.sh          |  10 ++
 3 files changed, 122 insertions(+)
 create mode 100755 demos/streaming_tts_server/client.sh
 create mode 100644 demos/streaming_tts_server/conf/tts_online_ws_application.yaml
 create mode 100755 demos/streaming_tts_server/server.sh

diff --git a/demos/streaming_tts_server/client.sh b/demos/streaming_tts_server/client.sh
new file mode 100755
index 000000000..e93da58a8
--- /dev/null
+++ b/demos/streaming_tts_server/client.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# http client test
+# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
+paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.http.wav
+
+# websocket client test
+# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
+paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8192 --protocol websocket --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.ws.wav
diff --git a/demos/streaming_tts_server/conf/tts_online_ws_application.yaml b/demos/streaming_tts_server/conf/tts_online_ws_application.yaml
new file mode 100644
index 000000000..146f06f15
--- /dev/null
+++ b/demos/streaming_tts_server/conf/tts_online_ws_application.yaml
@@ -0,0 +1,103 @@
+# This is the parameter configuration file for streaming tts server.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8192
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
+# protocol choices = ['websocket', 'http'] 
+protocol: 'websocket'
+engine_list: ['tts_online-onnx']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### TTS #########################################
+################### speech task: tts; engine_type: online #######################
+tts_online: 
+    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']   
+    # fastspeech2_cnndecoder_csmsc support streaming am infer.     
+    am: 'fastspeech2_csmsc'   
+    am_config: 
+    am_ckpt: 
+    am_stat: 
+    phones_dict: 
+    tones_dict: 
+    speaker_dict: 
+    spk_id: 0
+
+    # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
+    # Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
+    voc: 'mb_melgan_csmsc'
+    voc_config: 
+    voc_ckpt: 
+    voc_stat: 
+
+    # others
+    lang: 'zh'
+    device: 'cpu' # set 'gpu:id' or 'cpu'
+    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
+    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
+    am_block: 72
+    am_pad: 12
+    # voc_pad and voc_block voc model to streaming voc infer,
+    # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
+    voc_block: 36
+    voc_pad: 14
+    
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### TTS #########################################
+################### speech task: tts; engine_type: online-onnx #######################
+tts_online-onnx: 
+    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
+    # fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.        
+    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
+    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
+    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
+    am_ckpt:   # list
+    am_stat: 
+    phones_dict: 
+    tones_dict: 
+    speaker_dict: 
+    spk_id: 0
+    am_sample_rate: 24000
+    am_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 4
+
+    # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
+    # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
+    voc: 'hifigan_csmsc_onnx'
+    voc_ckpt: 
+    voc_sample_rate: 24000
+    voc_sess_conf:
+        device: "cpu" # set 'gpu:id' or 'cpu'
+        use_trt: False
+        cpu_threads: 4
+
+    # others
+    lang: 'zh'
+    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
+    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
+    am_block: 72
+    am_pad: 12
+    # voc_pad and voc_block voc model to streaming voc infer,
+    # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
+    # when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
+    voc_block: 36
+    voc_pad: 14
+    # voc_upsample should be same as n_shift on voc config.
+    voc_upsample: 300
+    
diff --git a/demos/streaming_tts_server/server.sh b/demos/streaming_tts_server/server.sh
new file mode 100755
index 000000000..d34ddba02
--- /dev/null
+++ b/demos/streaming_tts_server/server.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# http server
+paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log &
+
+
+# websocket server
+paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log &
+
+