Merge pull request #1507 from zh794390558/cli

[cli] add cli batch/pipe example to readme
3 years ago · e8f2d8f11b
parent 2517df92a0 335638ba18
commit e8f2d8f11b
14 changed files with 68 additions and 30 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 *.pyc
 .vscode
 *log
 *.wav
 *.pdmodel
 *.pdiparams*
 *.zip
@ -30,5 +31,8 @@ tools/OpenBLAS/
 tools/Miniconda3-latest-Linux-x86_64.sh
 tools/activate_python.sh
 tools/miniconda.sh
 tools/CRF++-0.58/
 speechx/fc_patch/
 *output/
--- a/README.md
+++ b/README.md
@ -196,16 +196,18 @@ Developers can have a try of our models with [PaddleSpeech Command Line](./paddl
 ```shell
 paddlespeech cls --input input.wav
 ```
 **Automatic Speech Recognition**
 ```shell
 paddlespeech asr --lang zh --input input_16k.wav
 ```
 **Speech Translation** (English to Chinese)
 **Speech Translation** (English to Chinese)
 (not support for Mac and Windows now)
 ```shell
 paddlespeech st --input input_16k.wav
 ```
 **Text-to-Speech** 
 ```shell
 paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --output output.wav
@ -218,7 +220,16 @@ paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --ou
  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
  ```
-  
+**Batch Process**
 ```
 echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
 ```  
 **Shell Pipeline**
 ASR + Punc:
 ```
 paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
 ```
 For more command lines, please see: [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)
--- a/README_cn.md
+++ b/README_cn.md
@ -216,6 +216,17 @@ paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
   ```
 **批处理**
 ```
 echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
 ```  
 **Shell管道**
 ASR + Punc:
 ```
 paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
 ```
 更多命令行命令请参考 [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)
 > Note: 如果需要训练或者微调，请查看[语音识别](./docs/source/asr/quick_start.md)， [语音合成](./docs/source/tts/quick_start.md)。
--- a/demos/speech_recognition/.gitignore
+++ b/demos/speech_recognition/.gitignore
@ -0,0 +1 @@
 *.wav
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@ -27,6 +27,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
  paddlespeech asr --input ./zh.wav
  # English
  paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
  # Chinese ASR + Punctuation Restoration
  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
  ```
  (It doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.)
--- a/demos/speech_recognition/README_cn.md
+++ b/demos/speech_recognition/README_cn.md
@ -25,6 +25,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
  paddlespeech asr --input ./zh.wav
  # 英文
  paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
  # 中文 + 标点恢复
  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
  ```
  (如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error，没有关系，这个包是非必须的。)
--- a/demos/speech_recognition/run.sh
+++ b/demos/speech_recognition/run.sh
@ -1,4 +1,10 @@
 #!/bin/bash
 wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
 # asr
 paddlespeech asr --input ./zh.wav
 # asr + punc
 paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
--- a/demos/text_to_speech/README.md
+++ b/demos/text_to_speech/README.md
@ -17,11 +17,14 @@ The input of this demo should be a text of the specific language that can be pas
 ### 3. Usage
 - Command Line (Recommended)
    - Chinese
        The default acoustic model is `Fastspeech2`, and the default vocoder is `Parallel WaveGAN`.
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
    - Batch Process
        ```bash
        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
        ```
    - Chinese, use `SpeedySpeech` as the acoustic model
        ```bash
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
--- a/demos/text_to_speech/README_cn.md
+++ b/demos/text_to_speech/README_cn.md
@ -24,6 +24,10 @@
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
    - 批处理
        ```bash
        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
        ```
    - 中文，使用 `SpeedySpeech` 作为声学模型
        ```bash
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
--- a/demos/text_to_speech/run.sh
+++ b/demos/text_to_speech/run.sh
@ -1,3 +1,7 @@
 #!/bin/bash
 # single process
 paddlespeech tts --input 今天的天气不错啊
 # Batch process
 echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
    """
    rng = np.random.RandomState(epoch)
    shift_len = rng.randint(0, batch_size - 1)
-    batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
+    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
    rng.shuffle(batch_indices)
    batch_indices = [item for batch in batch_indices for item in batch]
    assert clipped is False
--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@ -33,8 +33,6 @@ from paddlespeech.s2t.modules.decoder import TransformerDecoder
 from paddlespeech.s2t.modules.encoder import ConformerEncoder
 from paddlespeech.s2t.modules.encoder import TransformerEncoder
 from paddlespeech.s2t.modules.loss import LabelSmoothingLoss
 from paddlespeech.s2t.modules.mask import mask_finished_preds
 from paddlespeech.s2t.modules.mask import mask_finished_scores
 from paddlespeech.s2t.modules.mask import subsequent_mask
 from paddlespeech.s2t.utils import checkpoint
 from paddlespeech.s2t.utils import layer_tools
@ -291,7 +289,7 @@ class U2STBaseModel(nn.Layer):
        device = speech.place
        # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder and init hypothesis 
+        # 1. Encoder and init hypothesis
        encoder_out, encoder_mask = self._forward_encoder(
            speech, speech_lengths, decoding_chunk_size,
            num_decoding_left_chunks,
--- a/paddlespeech/t2s/modules/transformer/repeat.py
+++ b/paddlespeech/t2s/modules/transformer/repeat.py
@ -36,4 +36,4 @@ def repeat(N, fn):
    Returns:
        MultiSequential: Repeated model instance.
    """
-    return MultiSequential(*[fn(n) for n in range(N)])
+    return MultiSequential(* [fn(n) for n in range(N)])
--- a/tests/unit/asr/deepspeech2_online_model_test.py
+++ b/tests/unit/asr/deepspeech2_online_model_test.py
@ -11,16 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import pickle
 import unittest
 import numpy as np
 import paddle
 import pickle
 import os
 from paddle import inference
 from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
 from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline
 from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
 class TestDeepSpeech2ModelOnline(unittest.TestCase):
    def setUp(self):
@ -185,15 +186,12 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase):
                paddle.allclose(final_state_c_box, final_state_c_box_chk), True)
 class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
    def setUp(self):
        export_prefix = "exp/deepspeech2_online/checkpoints/test_export"
        if not os.path.exists(os.path.dirname(export_prefix)):
            os.makedirs(os.path.dirname(export_prefix), mode=0o755)
-        infer_model =  DeepSpeech2InferModelOnline(
+        infer_model = DeepSpeech2InferModelOnline(
            feat_size=161,
            dict_size=4233,
            num_conv_layers=2,
@ -207,27 +205,25 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
        with open("test_data/static_ds2online_inputs.pickle", "rb") as f:
            self.data_dict = pickle.load(f)
-        
+
        self.setup_model(export_prefix)
    def setup_model(self, export_prefix):
-        deepspeech_config = inference.Config(
+        deepspeech_config = inference.Config(export_prefix + ".pdmodel",
-            export_prefix + ".pdmodel",
+                                             export_prefix + ".pdiparams")
-            export_prefix + ".pdiparams")
+        if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and
-        if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
+                os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
            deepspeech_config.enable_use_gpu(100, 0)
            deepspeech_config.enable_memory_optim()
        deepspeech_predictor = inference.create_predictor(deepspeech_config)
        self.predictor = deepspeech_predictor
-    
+
    def test_unit(self):
        input_names = self.predictor.get_input_names()
        audio_handle = self.predictor.get_input_handle(input_names[0])
        audio_len_handle = self.predictor.get_input_handle(input_names[1])
        h_box_handle = self.predictor.get_input_handle(input_names[2])
        c_box_handle = self.predictor.get_input_handle(input_names[3])
        x_chunk = self.data_dict["audio_chunk"]
        x_chunk_lens = self.data_dict["audio_chunk_lens"]
@ -246,13 +242,9 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
        c_box_handle.reshape(chunk_state_c_box.shape)
        c_box_handle.copy_from_cpu(chunk_state_c_box)
        output_names = self.predictor.get_output_names()
-        output_handle = self.predictor.get_output_handle(
+        output_handle = self.predictor.get_output_handle(output_names[0])
-            output_names[0])
+        output_lens_handle = self.predictor.get_output_handle(output_names[1])
        output_lens_handle = self.predictor.get_output_handle(
            output_names[1])
        output_state_h_handle = self.predictor.get_output_handle(
            output_names[2])
        output_state_c_handle = self.predictor.get_output_handle(
@ -264,7 +256,7 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
        chunk_state_h_box = output_state_h_handle.copy_to_cpu()
        chunk_state_c_box = output_state_c_handle.copy_to_cpu()
        return True
-    
+
 if __name__ == '__main__':
    unittest.main()