fixed comments, test=doc

pull/1627/head
WilliamZhang06 2 years ago
parent d847fe29cf
commit 2ec8d608bf

@ -3,18 +3,15 @@
################################################################################# #################################################################################
# SERVER SETTING # # SERVER SETTING #
################################################################################# #################################################################################
host: 0.0.0.0 host: 127.0.0.1
port: 8090 port: 8090
# The task format in the engin_list is: <speech task>_<engine type> # The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
# protocol: 'http' # protocol = ['websocket', 'http'] (only one can be selected).
# engine_list: ['asr_python', 'tts_python', 'cls_python'] # http only support offline engine type.
protocol: 'http'
engine_list: ['asr_python', 'tts_python', 'cls_python']
# websocket, http (only choose one). websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
################################################################################# #################################################################################

@ -0,0 +1,51 @@
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8091
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online', 'tts_online']
# protocol = ['websocket', 'http'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
model_type: 'deepspeech2online_aishell'
am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional]
lang: 'zh'
sample_rate: 16000
cfg_path:
decode_method:
force_yes: True
am_predictor_conf:
device: # set 'gpu:id' or 'cpu'
switch_ir_optim: True
glog_info: False # True -> print glog
summary: True # False -> do not show predictor config
chunk_buffer_conf:
frame_duration_ms: 80
shift_ms: 40
sample_rate: 16000
sample_width: 2
vad_conf:
aggressiveness: 2
sample_rate: 16000
frame_duration_ms: 20
sample_width: 2
padding_ms: 200
padding_ratio: 0.9

@ -11,25 +11,23 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
record wave from the mic record wave from the mic
""" """
import asyncio
import json
import logging
import threading import threading
import pyaudio
import wave import wave
import logging from signal import SIGINT
import asyncio from signal import SIGTERM
import pyaudio
import websockets import websockets
import json
from signal import SIGINT, SIGTERM
class ASRAudioHandler(threading.Thread): class ASRAudioHandler(threading.Thread):
def __init__(self, def __init__(self, url="127.0.0.1", port=8091):
url="127.0.0.1",
port=8090):
threading.Thread.__init__(self) threading.Thread.__init__(self)
self.url = url self.url = url
self.port = port self.port = port
@ -56,12 +54,13 @@ class ASRAudioHandler(threading.Thread):
self._running = True self._running = True
self._frames = [] self._frames = []
p = pyaudio.PyAudio() p = pyaudio.PyAudio()
stream = p.open(format=self.format, stream = p.open(
format=self.format,
channels=self.channels, channels=self.channels,
rate=self.rate, rate=self.rate,
input=True, input=True,
frames_per_buffer=self.chunk) frames_per_buffer=self.chunk)
while(self._running): while (self._running):
data = stream.read(self.chunk) data = stream.read(self.chunk)
self._frames.append(data) self._frames.append(data)
self.data_backup.append(data) self.data_backup.append(data)
@ -97,11 +96,15 @@ class ASRAudioHandler(threading.Thread):
async with websockets.connect(self.url) as ws: async with websockets.connect(self.url) as ws:
# 发送开始指令 # 发送开始指令
audio_info = json.dumps({ audio_info = json.dumps(
{
"name": "test.wav", "name": "test.wav",
"signal": "start", "signal": "start",
"nbest": 5 "nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': ')) },
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info) await ws.send(audio_info)
msg = await ws.recv() msg = await ws.recv()
logging.info("receive msg={}".format(msg)) logging.info("receive msg={}".format(msg))
@ -117,11 +120,15 @@ class ASRAudioHandler(threading.Thread):
except asyncio.CancelledError: except asyncio.CancelledError:
# quit # quit
# send finished # send finished
audio_info = json.dumps({ audio_info = json.dumps(
{
"name": "test.wav", "name": "test.wav",
"signal": "end", "signal": "end",
"nbest": 5 "nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': ')) },
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info) await ws.send(audio_info)
msg = await ws.recv() msg = await ws.recv()
logging.info("receive msg={}".format(msg)) logging.info("receive msg={}".format(msg))
@ -141,7 +148,7 @@ if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logging.info("asr websocket client start") logging.info("asr websocket client start")
handler = ASRAudioHandler("127.0.0.1", 8090) handler = ASRAudioHandler("127.0.0.1", 8091)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
main_task = asyncio.ensure_future(handler.run()) main_task = asyncio.ensure_future(handler.run())
for signal in [SIGINT, SIGTERM]: for signal in [SIGINT, SIGTERM]:

@ -11,26 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#!/usr/bin/python #!/usr/bin/python
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
import argparse import argparse
import logging import asyncio
import time
import os
import json import json
import wave import logging
import numpy as np import numpy as np
import asyncio
import websockets
import soundfile import soundfile
import websockets
class ASRAudioHandler: class ASRAudioHandler:
def __init__(self, def __init__(self, url="127.0.0.1", port=8090):
url="127.0.0.1",
port=8090):
self.url = url self.url = url
self.port = port self.port = port
self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
@ -42,13 +36,11 @@ class ASRAudioHandler:
chunk_size = 80 * 16 #80ms, sample_rate = 16kHz chunk_size = 80 * 16 #80ms, sample_rate = 16kHz
if (x_len - chunk_size) % chunk_stride != 0: if (x_len - chunk_size) % chunk_stride != 0:
padding_len_x = chunk_stride - (x_len - chunk_size padding_len_x = chunk_stride - (x_len - chunk_size) % chunk_stride
) % chunk_stride
else: else:
padding_len_x = 0 padding_len_x = 0
padding = np.zeros( padding = np.zeros((padding_len_x), dtype=samples.dtype)
(padding_len_x), dtype=samples.dtype)
padded_x = np.concatenate([samples, padding], axis=0) padded_x = np.concatenate([samples, padding], axis=0)
num_chunk = (x_len + padding_len_x - chunk_size) / chunk_stride + 1 num_chunk = (x_len + padding_len_x - chunk_size) / chunk_stride + 1
@ -68,11 +60,15 @@ class ASRAudioHandler:
async with websockets.connect(self.url) as ws: async with websockets.connect(self.url) as ws:
# server 端已经接收到 handshake 协议头 # server 端已经接收到 handshake 协议头
# 发送开始指令 # 发送开始指令
audio_info = json.dumps({ audio_info = json.dumps(
{
"name": "test.wav", "name": "test.wav",
"signal": "start", "signal": "start",
"nbest": 5 "nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': ')) },
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info) await ws.send(audio_info)
msg = await ws.recv() msg = await ws.recv()
logging.info("receive msg={}".format(msg)) logging.info("receive msg={}".format(msg))
@ -84,11 +80,15 @@ class ASRAudioHandler:
logging.info("receive msg={}".format(msg)) logging.info("receive msg={}".format(msg))
# finished # finished
audio_info = json.dumps({ audio_info = json.dumps(
{
"name": "test.wav", "name": "test.wav",
"signal": "end", "signal": "end",
"nbest": 5 "nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': ')) },
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info) await ws.send(audio_info)
msg = await ws.recv() msg = await ws.recv()
logging.info("receive msg={}".format(msg)) logging.info("receive msg={}".format(msg))
@ -97,7 +97,7 @@ class ASRAudioHandler:
def main(args): def main(args):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logging.info("asr websocket client start") logging.info("asr websocket client start")
handler = ASRAudioHandler("127.0.0.1", 8090) handler = ASRAudioHandler("127.0.0.1", 8091)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.run_until_complete(handler.run(args.wavfile)) loop.run_until_complete(handler.run(args.wavfile))
logging.info("asr websocket client finished") logging.info("asr websocket client finished")

@ -12,16 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import collections import collections
import logging
import webrtcvad import webrtcvad
class VADAudio(): class VADAudio():
def __init__(self, def __init__(self,
aggressiveness, aggressiveness=2,
rate, rate=16000,
frame_duration_ms, frame_duration_ms=20,
sample_width=2, sample_width=2,
padding_ms=200, padding_ms=200,
padding_ratio=0.9): padding_ratio=0.9):

@ -11,35 +11,39 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import base64
import traceback
from typing import Union
import random
import numpy as np
import json import json
import numpy as np
from fastapi import APIRouter from fastapi import APIRouter
from fastapi import WebSocket from fastapi import WebSocket
from fastapi import WebSocketDisconnect from fastapi import WebSocketDisconnect
from starlette.websockets import WebSocketState as WebSocketState from starlette.websockets import WebSocketState as WebSocketState
from paddlespeech.server.engine.asr.online.asr_engine import ASREngine
from paddlespeech.server.engine.engine_pool import get_engine_pool from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.utils.buffer import ChunkBuffer from paddlespeech.server.utils.buffer import ChunkBuffer
from paddlespeech.server.utils.vad import VADAudio from paddlespeech.server.utils.vad import VADAudio
router = APIRouter() router = APIRouter()
@router.websocket('/ws/asr') @router.websocket('/ws/asr')
async def websocket_endpoint(websocket: WebSocket): async def websocket_endpoint(websocket: WebSocket):
await websocket.accept() await websocket.accept()
engine_pool = get_engine_pool()
asr_engine = engine_pool['asr']
# init buffer # init buffer
chunk_buffer = ChunkBuffer(sample_width=2) chunk_buffer_conf = asr_engine.config.chunk_buffer_conf
chunk_buffer = ChunkBuffer(
sample_rate=chunk_buffer_conf['sample_rate'],
sample_width=chunk_buffer_conf['sample_width'])
# init vad # init vad
vad = VADAudio(2, 16000, 20) vad_conf = asr_engine.config.vad_conf
vad = VADAudio(
aggressiveness=vad_conf['aggressiveness'],
rate=vad_conf['sample_rate'],
frame_duration_ms=vad_conf['frame_duration_ms'])
try: try:
while True: while True:
@ -50,17 +54,11 @@ async def websocket_endpoint(websocket: WebSocket):
if "text" in message: if "text" in message:
message = json.loads(message["text"]) message = json.loads(message["text"])
if 'signal' not in message: if 'signal' not in message:
resp = { resp = {"status": "ok", "message": "no valid json data"}
"status": "ok",
"message": "no valid json data"
}
await websocket.send_json(resp) await websocket.send_json(resp)
if message['signal'] == 'start': if message['signal'] == 'start':
resp = { resp = {"status": "ok", "signal": "server_ready"}
"status": "ok",
"signal": "server_ready"
}
# do something at begining here # do something at begining here
await websocket.send_json(resp) await websocket.send_json(resp)
elif message['signal'] == 'end': elif message['signal'] == 'end':
@ -68,24 +66,19 @@ async def websocket_endpoint(websocket: WebSocket):
asr_engine = engine_pool['asr'] asr_engine = engine_pool['asr']
# reset single engine for an new connection # reset single engine for an new connection
asr_engine.reset() asr_engine.reset()
resp = { resp = {"status": "ok", "signal": "finished"}
"status": "ok",
"signal": "finished"
}
await websocket.send_json(resp) await websocket.send_json(resp)
break break
else: else:
resp = { resp = {"status": "ok", "message": "no valid json data"}
"status": "ok",
"message": "no valid json data"
}
await websocket.send_json(resp) await websocket.send_json(resp)
elif "bytes" in message: elif "bytes" in message:
message = message["bytes"] message = message["bytes"]
# vad for input bytes audio # vad for input bytes audio
vad.add_audio(message) vad.add_audio(message)
message = b''.join(f for f in vad.vad_collector() if f is not None) message = b''.join(f for f in vad.vad_collector()
if f is not None)
engine_pool = get_engine_pool() engine_pool = get_engine_pool()
asr_engine = engine_pool['asr'] asr_engine = engine_pool['asr']
@ -94,7 +87,8 @@ async def websocket_endpoint(websocket: WebSocket):
for frame in frames: for frame in frames:
samples = np.frombuffer(frame.bytes, dtype=np.int16) samples = np.frombuffer(frame.bytes, dtype=np.int16)
sample_rate = asr_engine.config.sample_rate sample_rate = asr_engine.config.sample_rate
x_chunk, x_chunk_lens = asr_engine.preprocess(samples, sample_rate) x_chunk, x_chunk_lens = asr_engine.preprocess(samples,
sample_rate)
asr_engine.run(x_chunk, x_chunk_lens) asr_engine.run(x_chunk, x_chunk_lens)
asr_results = asr_engine.postprocess() asr_results = asr_engine.postprocess()

Loading…
Cancel
Save