update the online protocal note, test=doc

4 years ago · 13a37b4892
parent 2f2cb7eaaf
commit 13a37b4892
1 changed files with 28 additions and 20 deletions
--- a/paddlespeech/server/ws/asr_socket.py
+++ b/paddlespeech/server/ws/asr_socket.py
@ -20,50 +20,52 @@ from starlette.websockets import WebSocketState as WebSocketState

 from paddlespeech.server.engine.asr.online.asr_engine import PaddleASRConnectionHanddler
 from paddlespeech.server.engine.engine_pool import get_engine_pool
-from paddlespeech.server.utils.buffer import ChunkBuffer
-from paddlespeech.server.utils.vad import VADAudio

 router = APIRouter()


@router.websocket('/ws/asr')
 async def websocket_endpoint(websocket: WebSocket):
+    """PaddleSpeech Online ASR Server api
+
+    Args:
+        websocket (WebSocket): the websocket instance
+    """
+
+    #1. the interface wait to accept the websocket protocal header
+    #   and only we receive the header, it establish the connection with specific thread
    await websocket.accept()

+    #2. if we accept the websocket headers, we will get the online asr engine instance
    engine_pool = get_engine_pool()
    asr_engine = engine_pool['asr']
-    connection_handler = None
-    # init buffer
-    # each websocekt connection has its own chunk buffer
-    chunk_buffer_conf = asr_engine.config.chunk_buffer_conf
-    chunk_buffer = ChunkBuffer(
-        window_n=chunk_buffer_conf.window_n,
-        shift_n=chunk_buffer_conf.shift_n,
-        window_ms=chunk_buffer_conf.window_ms,
-        shift_ms=chunk_buffer_conf.shift_ms,
-        sample_rate=chunk_buffer_conf.sample_rate,
-        sample_width=chunk_buffer_conf.sample_width)

-    # init vad
-    vad_conf = asr_engine.config.get('vad_conf', None)
-    if vad_conf:
-        vad = VADAudio(
-            aggressiveness=vad_conf['aggressiveness'],
-            rate=vad_conf['sample_rate'],
-            frame_duration_ms=vad_conf['frame_duration_ms'])
+    #3. each websocket connection, we will create an PaddleASRConnectionHanddler to process such audio
+    #   and each connection has its own connection instance to process the request
+    #   and only if client send the start signal, we create the PaddleASRConnectionHanddler instance
+    connection_handler = None

    try:
+        #4. we do a loop to process the audio package by package according the protocal
+        #   and only if the client send finished signal, we will break the loop
        while True:
            # careful here, changed the source code from starlette.websockets
+            # 4.1 we wait for the client signal for the specific action
            assert websocket.application_state == WebSocketState.CONNECTED
            message = await websocket.receive()
            websocket._raise_on_disconnect(message)
+
+            #4.2 text for the action command and bytes for pcm data
            if "text" in message:
+                # we first parse the specific command
                message = json.loads(message["text"])
                if 'signal' not in message:
                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)

+                # start command, we create the PaddleASRConnectionHanddler instance to process the audio data
+                # end command, we process the all the last audio pcm and return the final result
+                #              and we break the loop
                if message['signal'] == 'start':
                    resp = {"status": "ok", "signal": "server_ready"}
                    # do something at begining here
@ -72,6 +74,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    await websocket.send_json(resp)
                elif message['signal'] == 'end':
                    # reset single  engine for an new connection
+                    # and we will destroy the connection
                    connection_handler.decode(is_finished=True)
                    connection_handler.rescoring()
                    asr_results = connection_handler.get_result()
@ -88,12 +91,17 @@ async def websocket_endpoint(websocket: WebSocket):
                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)
            elif "bytes" in message:
+                # bytes for the pcm data
                message = message["bytes"]

+                # we extract the remained audio pcm 
+                # and decode for the result in this package data
                connection_handler.extract_feat(message)
                connection_handler.decode(is_finished=False)
                asr_results = connection_handler.get_result()

+                # return the current period result
+                # if the engine create the vad instance, this connection will have many period results 
                resp = {'asr_results': asr_results}
                await websocket.send_json(resp)
    except WebSocketDisconnect: