fixed comments, test=doc

3 years ago · 2ec8d608bf
parent d847fe29cf
commit 2ec8d608bf
6 changed files with 142 additions and 94 deletions
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@ -3,18 +3,15 @@
 #################################################################################
 #                             SERVER SETTING                                    #
 #################################################################################
-host: 0.0.0.0
+host: 127.0.0.1
 port: 8090
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
-# protocol: 'http'
+# protocol = ['websocket', 'http'] (only one can be selected). 
-# engine_list: ['asr_python', 'tts_python', 'cls_python']
+# http only support offline engine type.
-
+protocol: 'http'
-
+engine_list: ['asr_python', 'tts_python', 'cls_python']
 # websocket, http (only choose one). websocket only support online engine type.
 protocol: 'websocket'
 engine_list: ['asr_online']
 #################################################################################
--- a/paddlespeech/server/conf/ws_application.yaml
+++ b/paddlespeech/server/conf/ws_application.yaml
@ -0,0 +1,51 @@
 # This is the parameter configuration file for PaddleSpeech Serving.
 #################################################################################
 #                             SERVER SETTING                                    #
 #################################################################################
 host: 0.0.0.0
 port: 8091
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_online', 'tts_online']
 # protocol = ['websocket', 'http'] (only one can be selected).
 # websocket only support online engine type.
 protocol: 'websocket'
 engine_list: ['asr_online']
 #################################################################################
 #                                ENGINE CONFIG                                  #
 #################################################################################
 ################################### ASR #########################################
 ################### speech task: asr; engine_type: online #######################
 asr_online:
    model_type: 'deepspeech2online_aishell'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config
    chunk_buffer_conf:
        frame_duration_ms: 80
        shift_ms: 40
        sample_rate: 16000
        sample_width: 2
    vad_conf:
        aggressiveness: 2
        sample_rate: 16000
        frame_duration_ms: 20
        sample_width: 2
        padding_ms: 200
        padding_ratio: 0.9
--- a/paddlespeech/server/tests/asr/online/microphone_client.py
+++ b/paddlespeech/server/tests/asr/online/microphone_client.py
@ -11,25 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 record wave from the mic
 """
-
+import asyncio
 import json
 import logging
 import threading
 import pyaudio
 import wave
-import logging
+from signal import SIGINT
-import asyncio
+from signal import SIGTERM
 import pyaudio
 import websockets
 import json
 from signal import SIGINT, SIGTERM
 class ASRAudioHandler(threading.Thread):
-    def __init__(self,
+    def __init__(self, url="127.0.0.1", port=8091):
                 url="127.0.0.1",
                 port=8090):
        threading.Thread.__init__(self)
        self.url = url
        self.port = port
@ -56,12 +54,13 @@ class ASRAudioHandler(threading.Thread):
        self._running = True
        self._frames = []
        p = pyaudio.PyAudio()
-        stream = p.open(format=self.format,
+        stream = p.open(
            format=self.format,
            channels=self.channels,
            rate=self.rate,
            input=True,
            frames_per_buffer=self.chunk)
-        while(self._running):
+        while (self._running):
            data = stream.read(self.chunk)
            self._frames.append(data)
            self.data_backup.append(data)
@ -97,11 +96,15 @@ class ASRAudioHandler(threading.Thread):
            async with websockets.connect(self.url) as ws:
                # 发送开始指令
-                audio_info = json.dumps({
+                audio_info = json.dumps(
                    {
                        "name": "test.wav",
                        "signal": "start",
                        "nbest": 5
-                                }, sort_keys=True, indent=4, separators=(',', ': '))
+                    },
                    sort_keys=True,
                    indent=4,
                    separators=(',', ': '))
                await ws.send(audio_info)
                msg = await ws.recv()
                logging.info("receive msg={}".format(msg))
@ -117,11 +120,15 @@ class ASRAudioHandler(threading.Thread):
                except asyncio.CancelledError:
                    # quit
                    # send finished 
-                    audio_info = json.dumps({
+                    audio_info = json.dumps(
                        {
                            "name": "test.wav",
                            "signal": "end",
                            "nbest": 5
-                                    }, sort_keys=True, indent=4, separators=(',', ': '))
+                        },
                        sort_keys=True,
                        indent=4,
                        separators=(',', ': '))
                    await ws.send(audio_info)
                    msg = await ws.recv()
                    logging.info("receive msg={}".format(msg))
@ -141,7 +148,7 @@ if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    logging.info("asr websocket client start")
-    handler = ASRAudioHandler("127.0.0.1", 8090)
+    handler = ASRAudioHandler("127.0.0.1", 8091)
    loop = asyncio.get_event_loop()
    main_task = asyncio.ensure_future(handler.run())
    for signal in [SIGINT, SIGTERM]:
--- a/paddlespeech/server/tests/asr/online/websocket_client.py
+++ b/paddlespeech/server/tests/asr/online/websocket_client.py
@ -11,26 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #!/usr/bin/python
 # -*- coding: UTF-8 -*-
 import argparse
-import logging
+import asyncio
 import time
 import os
 import json
-import wave
+import logging
 import numpy as np
 import asyncio
 import websockets
 import soundfile
 import websockets
 class ASRAudioHandler:
-    def __init__(self,
+    def __init__(self, url="127.0.0.1", port=8090):
                 url="127.0.0.1",
                 port=8090):
        self.url = url
        self.port = port
        self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
@ -42,13 +36,11 @@ class ASRAudioHandler:
        chunk_size = 80 * 16  #80ms, sample_rate = 16kHz
        if (x_len - chunk_size) % chunk_stride != 0:
-            padding_len_x = chunk_stride - (x_len - chunk_size
+            padding_len_x = chunk_stride - (x_len - chunk_size) % chunk_stride
                                            ) % chunk_stride
        else:
            padding_len_x = 0
-        padding = np.zeros(
+        padding = np.zeros((padding_len_x), dtype=samples.dtype)
            (padding_len_x), dtype=samples.dtype)
        padded_x = np.concatenate([samples, padding], axis=0)
        num_chunk = (x_len + padding_len_x - chunk_size) / chunk_stride + 1
@ -68,11 +60,15 @@ class ASRAudioHandler:
        async with websockets.connect(self.url) as ws:
            # server 端已经接收到 handshake 协议头
            # 发送开始指令
-            audio_info = json.dumps({
+            audio_info = json.dumps(
                {
                    "name": "test.wav",
                    "signal": "start",
                    "nbest": 5
-                            }, sort_keys=True, indent=4, separators=(',', ': '))
+                },
                sort_keys=True,
                indent=4,
                separators=(',', ': '))
            await ws.send(audio_info)
            msg = await ws.recv()
            logging.info("receive msg={}".format(msg))
@ -84,11 +80,15 @@ class ASRAudioHandler:
                logging.info("receive msg={}".format(msg))
            # finished 
-            audio_info = json.dumps({
+            audio_info = json.dumps(
                {
                    "name": "test.wav",
                    "signal": "end",
                    "nbest": 5
-                            }, sort_keys=True, indent=4, separators=(',', ': '))
+                },
                sort_keys=True,
                indent=4,
                separators=(',', ': '))
            await ws.send(audio_info)
            msg = await ws.recv()
            logging.info("receive msg={}".format(msg))
@ -97,7 +97,7 @@ class ASRAudioHandler:
 def main(args):
    logging.basicConfig(level=logging.INFO)
    logging.info("asr websocket client start")
-    handler = ASRAudioHandler("127.0.0.1", 8090)
+    handler = ASRAudioHandler("127.0.0.1", 8091)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(handler.run(args.wavfile))
    logging.info("asr websocket client finished")
--- a/paddlespeech/server/utils/vad.py
+++ b/paddlespeech/server/utils/vad.py
@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections
 import logging
 import webrtcvad
 class VADAudio():
    def __init__(self,
-                 aggressiveness,
+                 aggressiveness=2,
-                 rate,
+                 rate=16000,
-                 frame_duration_ms,
+                 frame_duration_ms=20,
                 sample_width=2,
                 padding_ms=200,
                 padding_ratio=0.9):
--- a/paddlespeech/server/ws/asr_socket.py
+++ b/paddlespeech/server/ws/asr_socket.py
@ -11,35 +11,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import base64
 import traceback
 from typing import Union
 import random
 import numpy as np
 import json
 import numpy as np
 from fastapi import APIRouter
 from fastapi import WebSocket
 from fastapi import WebSocketDisconnect
 from starlette.websockets import WebSocketState as WebSocketState
 from paddlespeech.server.engine.asr.online.asr_engine import ASREngine
 from paddlespeech.server.engine.engine_pool import get_engine_pool
 from paddlespeech.server.utils.buffer import ChunkBuffer
 from paddlespeech.server.utils.vad import VADAudio
 router = APIRouter()
@router.websocket('/ws/asr')
 async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
    engine_pool = get_engine_pool()
    asr_engine = engine_pool['asr']
    # init buffer
-    chunk_buffer = ChunkBuffer(sample_width=2)
+    chunk_buffer_conf = asr_engine.config.chunk_buffer_conf
    chunk_buffer = ChunkBuffer(
        sample_rate=chunk_buffer_conf['sample_rate'],
        sample_width=chunk_buffer_conf['sample_width'])
    # init vad
-    vad = VADAudio(2, 16000, 20)
+    vad_conf = asr_engine.config.vad_conf
    vad = VADAudio(
        aggressiveness=vad_conf['aggressiveness'],
        rate=vad_conf['sample_rate'],
        frame_duration_ms=vad_conf['frame_duration_ms'])
    try:
        while True:
@ -50,17 +54,11 @@ async def websocket_endpoint(websocket: WebSocket):
            if "text" in message:
                message = json.loads(message["text"])
                if 'signal' not in message:
-                    resp = {
+                    resp = {"status": "ok", "message": "no valid json data"}
                            "status": "ok",
                            "message": "no valid json data"
                            }
                    await websocket.send_json(resp)
                if message['signal'] == 'start':
-                    resp = {
+                    resp = {"status": "ok", "signal": "server_ready"}
                            "status": "ok",
                            "signal": "server_ready"
                            }
                    # do something at begining here
                    await websocket.send_json(resp)
                elif message['signal'] == 'end':
@ -68,24 +66,19 @@ async def websocket_endpoint(websocket: WebSocket):
                    asr_engine = engine_pool['asr']
                    # reset single  engine for an new connection
                    asr_engine.reset()
-                    resp = {
+                    resp = {"status": "ok", "signal": "finished"}
                            "status": "ok",
                            "signal": "finished"
                            }
                    await websocket.send_json(resp)
                    break
                else:
-                    resp = {
+                    resp = {"status": "ok", "message": "no valid json data"}
                            "status": "ok",
                            "message": "no valid json data"
                            }
                    await websocket.send_json(resp)
            elif "bytes" in message:
                message = message["bytes"]
                # vad for input bytes audio
                vad.add_audio(message)
-                message = b''.join(f for f in vad.vad_collector() if f is not None)
+                message = b''.join(f for f in vad.vad_collector()
                                   if f is not None)
                engine_pool = get_engine_pool()
                asr_engine = engine_pool['asr']
@ -94,7 +87,8 @@ async def websocket_endpoint(websocket: WebSocket):
                for frame in frames:
                    samples = np.frombuffer(frame.bytes, dtype=np.int16)
                    sample_rate = asr_engine.config.sample_rate
-                    x_chunk, x_chunk_lens = asr_engine.preprocess(samples, sample_rate)
+                    x_chunk, x_chunk_lens = asr_engine.preprocess(samples,
                                                                  sample_rate)
                    asr_engine.run(x_chunk, x_chunk_lens)
                    asr_results = asr_engine.postprocess()