You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
4.0 KiB
101 lines
4.0 KiB
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import json
|
|
|
|
import numpy as np
|
|
from fastapi import APIRouter
|
|
from fastapi import WebSocket
|
|
from fastapi import WebSocketDisconnect
|
|
from starlette.websockets import WebSocketState as WebSocketState
|
|
|
|
from paddlespeech.server.engine.engine_pool import get_engine_pool
|
|
from paddlespeech.server.utils.buffer import ChunkBuffer
|
|
from paddlespeech.server.utils.vad import VADAudio
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.websocket('/ws/asr')
|
|
async def websocket_endpoint(websocket: WebSocket):
|
|
|
|
await websocket.accept()
|
|
|
|
engine_pool = get_engine_pool()
|
|
asr_engine = engine_pool['asr']
|
|
# init buffer
|
|
chunk_buffer_conf = asr_engine.config.chunk_buffer_conf
|
|
chunk_buffer = ChunkBuffer(
|
|
sample_rate=chunk_buffer_conf['sample_rate'],
|
|
sample_width=chunk_buffer_conf['sample_width'])
|
|
# init vad
|
|
vad_conf = asr_engine.config.vad_conf
|
|
vad = VADAudio(
|
|
aggressiveness=vad_conf['aggressiveness'],
|
|
rate=vad_conf['sample_rate'],
|
|
frame_duration_ms=vad_conf['frame_duration_ms'])
|
|
|
|
try:
|
|
while True:
|
|
# careful here, changed the source code from starlette.websockets
|
|
assert websocket.application_state == WebSocketState.CONNECTED
|
|
message = await websocket.receive()
|
|
websocket._raise_on_disconnect(message)
|
|
if "text" in message:
|
|
message = json.loads(message["text"])
|
|
if 'signal' not in message:
|
|
resp = {"status": "ok", "message": "no valid json data"}
|
|
await websocket.send_json(resp)
|
|
|
|
if message['signal'] == 'start':
|
|
resp = {"status": "ok", "signal": "server_ready"}
|
|
# do something at begining here
|
|
await websocket.send_json(resp)
|
|
elif message['signal'] == 'end':
|
|
engine_pool = get_engine_pool()
|
|
asr_engine = engine_pool['asr']
|
|
# reset single engine for an new connection
|
|
asr_engine.reset()
|
|
resp = {"status": "ok", "signal": "finished"}
|
|
await websocket.send_json(resp)
|
|
break
|
|
else:
|
|
resp = {"status": "ok", "message": "no valid json data"}
|
|
await websocket.send_json(resp)
|
|
elif "bytes" in message:
|
|
message = message["bytes"]
|
|
|
|
# vad for input bytes audio
|
|
vad.add_audio(message)
|
|
message = b''.join(f for f in vad.vad_collector()
|
|
if f is not None)
|
|
|
|
engine_pool = get_engine_pool()
|
|
asr_engine = engine_pool['asr']
|
|
asr_results = ""
|
|
frames = chunk_buffer.frame_generator(message)
|
|
for frame in frames:
|
|
samples = np.frombuffer(frame.bytes, dtype=np.int16)
|
|
sample_rate = asr_engine.config.sample_rate
|
|
x_chunk, x_chunk_lens = asr_engine.preprocess(samples,
|
|
sample_rate)
|
|
asr_engine.run(x_chunk, x_chunk_lens)
|
|
asr_results = asr_engine.postprocess()
|
|
|
|
asr_results = asr_engine.postprocess()
|
|
resp = {'asr_results': asr_results}
|
|
|
|
await websocket.send_json(resp)
|
|
except WebSocketDisconnect:
|
|
pass
|