From 54341c88a6e5d7595d20bfbb3a21cd84ecdaebfc Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 28 Feb 2022 10:39:19 +0000
Subject: [PATCH 1/3] cli batch and shell pipe, test=doc

---
 README.md                             | 15 +++++++++++++--
 README_cn.md                          | 11 +++++++++++
 demos/speech_recognition/.gitignore   |  1 +
 demos/speech_recognition/README.md    |  2 ++
 demos/speech_recognition/README_cn.md |  2 ++
 demos/speech_recognition/run.sh       |  6 ++++++
 demos/text_to_speech/README.md        |  5 ++++-
 demos/text_to_speech/README_cn.md     |  4 ++++
 demos/text_to_speech/run.sh           |  4 ++++
 9 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 demos/speech_recognition/.gitignore

diff --git a/README.md b/README.md
index 46730797..a142cb5e 100644
--- a/README.md
+++ b/README.md
@@ -196,16 +196,18 @@ Developers can have a try of our models with [PaddleSpeech Command Line](./paddl
 ```shell
 paddlespeech cls --input input.wav
 ```
+
 **Automatic Speech Recognition**
 ```shell
 paddlespeech asr --lang zh --input input_16k.wav
 ```
-**Speech Translation** (English to Chinese)
 
+**Speech Translation** (English to Chinese)
 (not support for Mac and Windows now)
 ```shell
 paddlespeech st --input input_16k.wav
 ```
+
 **Text-to-Speech** 
 ```shell
 paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --output output.wav
@@ -218,7 +220,16 @@ paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --ou
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
   ```
 
-  
+**Batch Process**
+```
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+```  
+
+**Shell Pipeline**
+ASR + Punc:
+```
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+```
 
 For more command lines, please see: [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)
 
diff --git a/README_cn.md b/README_cn.md
index 9782240a..366d9a02 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -216,6 +216,17 @@ paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！
    paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
    ```
 
+**批处理**
+```
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+```  
+
+**Shell管道**
+ASR + Punc:
+```
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+```
+
 更多命令行命令请参考 [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)
 > Note: 如果需要训练或者微调，请查看[语音识别](./docs/source/asr/quick_start.md)， [语音合成](./docs/source/tts/quick_start.md)。
 
diff --git a/demos/speech_recognition/.gitignore b/demos/speech_recognition/.gitignore
new file mode 100644
index 00000000..d8dd7532
--- /dev/null
+++ b/demos/speech_recognition/.gitignore
@@ -0,0 +1 @@
+*.wav
diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md
index c49afa35..5d964fce 100644
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@@ -27,6 +27,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
   paddlespeech asr --input ./zh.wav
   # English
   paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+  # Chinese ASR + Punctuation Restoration
+  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
   ```
   (It doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.)
   
diff --git a/demos/speech_recognition/README_cn.md b/demos/speech_recognition/README_cn.md
index c2e38c91..ba1f1d65 100644
--- a/demos/speech_recognition/README_cn.md
+++ b/demos/speech_recognition/README_cn.md
@@ -25,6 +25,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
   paddlespeech asr --input ./zh.wav
   # 英文
   paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+  # 中文 + 标点恢复
+  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
   ```
   (如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error，没有关系，这个包是非必须的。)
   
diff --git a/demos/speech_recognition/run.sh b/demos/speech_recognition/run.sh
index 5efc8b81..06466928 100755
--- a/demos/speech_recognition/run.sh
+++ b/demos/speech_recognition/run.sh
@@ -1,4 +1,10 @@
 #!/bin/bash
 
 wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+
+# asr
 paddlespeech asr --input ./zh.wav
+
+
+# asr + punc
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
\ No newline at end of file
diff --git a/demos/text_to_speech/README.md b/demos/text_to_speech/README.md
index 9d3c4ac5..2df72a82 100644
--- a/demos/text_to_speech/README.md
+++ b/demos/text_to_speech/README.md
@@ -17,11 +17,14 @@ The input of this demo should be a text of the specific language that can be pas
 ### 3. Usage
 - Command Line (Recommended)
     - Chinese
-    
         The default acoustic model is `Fastspeech2`, and the default vocoder is `Parallel WaveGAN`.
         ```bash
         paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
         ```
+    - Batch Process
+        ```bash
+        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+        ```
     - Chinese, use `SpeedySpeech` as the acoustic model
         ```bash
         paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
diff --git a/demos/text_to_speech/README_cn.md b/demos/text_to_speech/README_cn.md
index f075efda..7e02b962 100644
--- a/demos/text_to_speech/README_cn.md
+++ b/demos/text_to_speech/README_cn.md
@@ -24,6 +24,10 @@
         ```bash
         paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
         ```
+    - 批处理
+        ```bash
+        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+        ```
     - 中文，使用 `SpeedySpeech` 作为声学模型
         ```bash
         paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
diff --git a/demos/text_to_speech/run.sh b/demos/text_to_speech/run.sh
index c2487aee..b1340241 100755
--- a/demos/text_to_speech/run.sh
+++ b/demos/text_to_speech/run.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
+# single process
 paddlespeech tts --input 今天的天气不错啊
+
+# Batch process
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
\ No newline at end of file

From 75098698d8eae48d1d0343cd683c7b315ea4a02d Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 28 Feb 2022 10:45:39 +0000
Subject: [PATCH 2/3] format,test=doc

---
 paddlespeech/s2t/io/sampler.py                |  2 +-
 paddlespeech/s2t/models/u2_st/u2_st.py        |  4 +--
 .../t2s/modules/transformer/repeat.py         |  2 +-
 .../unit/asr/deepspeech2_online_model_test.py | 36 ++++++++-----------
 4 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py
index 89752bb9..ac55af12 100644
--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
     """
     rng = np.random.RandomState(epoch)
     shift_len = rng.randint(0, batch_size - 1)
-    batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
+    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
     rng.shuffle(batch_indices)
     batch_indices = [item for batch in batch_indices for item in batch]
     assert clipped is False
diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py
index f7b05714..999723e5 100644
--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@@ -33,8 +33,6 @@ from paddlespeech.s2t.modules.decoder import TransformerDecoder
 from paddlespeech.s2t.modules.encoder import ConformerEncoder
 from paddlespeech.s2t.modules.encoder import TransformerEncoder
 from paddlespeech.s2t.modules.loss import LabelSmoothingLoss
-from paddlespeech.s2t.modules.mask import mask_finished_preds
-from paddlespeech.s2t.modules.mask import mask_finished_scores
 from paddlespeech.s2t.modules.mask import subsequent_mask
 from paddlespeech.s2t.utils import checkpoint
 from paddlespeech.s2t.utils import layer_tools
@@ -291,7 +289,7 @@ class U2STBaseModel(nn.Layer):
         device = speech.place
 
         # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder and init hypothesis 
+        # 1. Encoder and init hypothesis
         encoder_out, encoder_mask = self._forward_encoder(
             speech, speech_lengths, decoding_chunk_size,
             num_decoding_left_chunks,
diff --git a/paddlespeech/t2s/modules/transformer/repeat.py b/paddlespeech/t2s/modules/transformer/repeat.py
index 2073a78b..1e946adf 100644
--- a/paddlespeech/t2s/modules/transformer/repeat.py
+++ b/paddlespeech/t2s/modules/transformer/repeat.py
@@ -36,4 +36,4 @@ def repeat(N, fn):
     Returns:
         MultiSequential: Repeated model instance.
     """
-    return MultiSequential(*[fn(n) for n in range(N)])
+    return MultiSequential(* [fn(n) for n in range(N)])
diff --git a/tests/unit/asr/deepspeech2_online_model_test.py b/tests/unit/asr/deepspeech2_online_model_test.py
index d26e5b15..f23c4926 100644
--- a/tests/unit/asr/deepspeech2_online_model_test.py
+++ b/tests/unit/asr/deepspeech2_online_model_test.py
@@ -11,16 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+import pickle
 import unittest
 
 import numpy as np
 import paddle
-import pickle
-import os
 from paddle import inference
 
-from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
 from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline
+from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
+
 
 class TestDeepSpeech2ModelOnline(unittest.TestCase):
     def setUp(self):
@@ -185,15 +186,12 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase):
                 paddle.allclose(final_state_c_box, final_state_c_box_chk), True)
 
 
-
-
 class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
-    
     def setUp(self):
         export_prefix = "exp/deepspeech2_online/checkpoints/test_export"
         if not os.path.exists(os.path.dirname(export_prefix)):
             os.makedirs(os.path.dirname(export_prefix), mode=0o755)
-        infer_model =  DeepSpeech2InferModelOnline(
+        infer_model = DeepSpeech2InferModelOnline(
             feat_size=161,
             dict_size=4233,
             num_conv_layers=2,
@@ -207,27 +205,25 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
 
         with open("test_data/static_ds2online_inputs.pickle", "rb") as f:
             self.data_dict = pickle.load(f)
-        
+
         self.setup_model(export_prefix)
-    
 
     def setup_model(self, export_prefix):
-        deepspeech_config = inference.Config(
-            export_prefix + ".pdmodel",
-            export_prefix + ".pdiparams")
-        if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
+        deepspeech_config = inference.Config(export_prefix + ".pdmodel",
+                                             export_prefix + ".pdiparams")
+        if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and
+                os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
             deepspeech_config.enable_use_gpu(100, 0)
             deepspeech_config.enable_memory_optim()
         deepspeech_predictor = inference.create_predictor(deepspeech_config)
         self.predictor = deepspeech_predictor
-    
+
     def test_unit(self):
         input_names = self.predictor.get_input_names()
         audio_handle = self.predictor.get_input_handle(input_names[0])
         audio_len_handle = self.predictor.get_input_handle(input_names[1])
         h_box_handle = self.predictor.get_input_handle(input_names[2])
         c_box_handle = self.predictor.get_input_handle(input_names[3])
-        
 
         x_chunk = self.data_dict["audio_chunk"]
         x_chunk_lens = self.data_dict["audio_chunk_lens"]
@@ -246,13 +242,9 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
         c_box_handle.reshape(chunk_state_c_box.shape)
         c_box_handle.copy_from_cpu(chunk_state_c_box)
 
-
-
         output_names = self.predictor.get_output_names()
-        output_handle = self.predictor.get_output_handle(
-            output_names[0])
-        output_lens_handle = self.predictor.get_output_handle(
-            output_names[1])
+        output_handle = self.predictor.get_output_handle(output_names[0])
+        output_lens_handle = self.predictor.get_output_handle(output_names[1])
         output_state_h_handle = self.predictor.get_output_handle(
             output_names[2])
         output_state_c_handle = self.predictor.get_output_handle(
@@ -264,7 +256,7 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
         chunk_state_h_box = output_state_h_handle.copy_to_cpu()
         chunk_state_c_box = output_state_c_handle.copy_to_cpu()
         return True
-    
+
 
 if __name__ == '__main__':
     unittest.main()

From 335638ba1877a72d94b39f964e999acd6e18f26a Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 28 Feb 2022 11:01:50 +0000
Subject: [PATCH 3/3] update gitignore, test=doct

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index cc8fff87..778824f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 *.pyc
 .vscode
 *log
+*.wav
 *.pdmodel
 *.pdiparams*
 *.zip
@@ -30,5 +31,8 @@ tools/OpenBLAS/
 tools/Miniconda3-latest-Linux-x86_64.sh
 tools/activate_python.sh
 tools/miniconda.sh
+tools/CRF++-0.58/
+
+speechx/fc_patch/
 
 *output/