fixed comments, test=doc

pull/1627/head
WilliamZhang06 2 years ago
parent d847fe29cf
commit 2ec8d608bf

@ -3,18 +3,15 @@
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
host: 127.0.0.1
port: 8090
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
# protocol: 'http'
# engine_list: ['asr_python', 'tts_python', 'cls_python']
# websocket, http (only choose one). websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
# protocol = ['websocket', 'http'] (only one can be selected).
# http only support offline engine type.
protocol: 'http'
engine_list: ['asr_python', 'tts_python', 'cls_python']
#################################################################################

@ -0,0 +1,51 @@
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8091
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online', 'tts_online']
# protocol = ['websocket', 'http'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
model_type: 'deepspeech2online_aishell'
am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional]
lang: 'zh'
sample_rate: 16000
cfg_path:
decode_method:
force_yes: True
am_predictor_conf:
device: # set 'gpu:id' or 'cpu'
switch_ir_optim: True
glog_info: False # True -> print glog
summary: True # False -> do not show predictor config
chunk_buffer_conf:
frame_duration_ms: 80
shift_ms: 40
sample_rate: 16000
sample_width: 2
vad_conf:
aggressiveness: 2
sample_rate: 16000
frame_duration_ms: 20
sample_width: 2
padding_ms: 200
padding_ratio: 0.9

@ -11,25 +11,23 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
record wave from the mic
"""
import asyncio
import json
import logging
import threading
import pyaudio
import wave
import logging
import asyncio
from signal import SIGINT
from signal import SIGTERM
import pyaudio
import websockets
import json
from signal import SIGINT, SIGTERM
class ASRAudioHandler(threading.Thread):
def __init__(self,
url="127.0.0.1",
port=8090):
def __init__(self, url="127.0.0.1", port=8091):
threading.Thread.__init__(self)
self.url = url
self.port = port
@ -56,12 +54,13 @@ class ASRAudioHandler(threading.Thread):
self._running = True
self._frames = []
p = pyaudio.PyAudio()
stream = p.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk)
while(self._running):
stream = p.open(
format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk)
while (self._running):
data = stream.read(self.chunk)
self._frames.append(data)
self.data_backup.append(data)
@ -97,11 +96,15 @@ class ASRAudioHandler(threading.Thread):
async with websockets.connect(self.url) as ws:
# 发送开始指令
audio_info = json.dumps({
"name": "test.wav",
"signal": "start",
"nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': '))
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "start",
"nbest": 5
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
logging.info("receive msg={}".format(msg))
@ -117,11 +120,15 @@ class ASRAudioHandler(threading.Thread):
except asyncio.CancelledError:
# quit
# send finished
audio_info = json.dumps({
"name": "test.wav",
"signal": "end",
"nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': '))
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "end",
"nbest": 5
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
logging.info("receive msg={}".format(msg))
@ -141,7 +148,7 @@ if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
logging.info("asr websocket client start")
handler = ASRAudioHandler("127.0.0.1", 8090)
handler = ASRAudioHandler("127.0.0.1", 8091)
loop = asyncio.get_event_loop()
main_task = asyncio.ensure_future(handler.run())
for signal in [SIGINT, SIGTERM]:

@ -11,26 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import argparse
import logging
import time
import os
import asyncio
import json
import wave
import logging
import numpy as np
import asyncio
import websockets
import soundfile
import websockets
class ASRAudioHandler:
def __init__(self,
url="127.0.0.1",
port=8090):
def __init__(self, url="127.0.0.1", port=8090):
self.url = url
self.port = port
self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
@ -38,17 +32,15 @@ class ASRAudioHandler:
def read_wave(self, wavfile_path: str):
samples, sample_rate = soundfile.read(wavfile_path, dtype='int16')
x_len = len(samples)
chunk_stride = 40 * 16 #40ms, sample_rate = 16kHz
chunk_size = 80 * 16 #80ms, sample_rate = 16kHz
chunk_stride = 40 * 16 #40ms, sample_rate = 16kHz
chunk_size = 80 * 16 #80ms, sample_rate = 16kHz
if (x_len - chunk_size) % chunk_stride != 0:
padding_len_x = chunk_stride - (x_len - chunk_size
) % chunk_stride
padding_len_x = chunk_stride - (x_len - chunk_size) % chunk_stride
else:
padding_len_x = 0
padding = np.zeros(
(padding_len_x), dtype=samples.dtype)
padding = np.zeros((padding_len_x), dtype=samples.dtype)
padded_x = np.concatenate([samples, padding], axis=0)
num_chunk = (x_len + padding_len_x - chunk_size) / chunk_stride + 1
@ -68,11 +60,15 @@ class ASRAudioHandler:
async with websockets.connect(self.url) as ws:
# server 端已经接收到 handshake 协议头
# 发送开始指令
audio_info = json.dumps({
"name": "test.wav",
"signal": "start",
"nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': '))
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "start",
"nbest": 5
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
logging.info("receive msg={}".format(msg))
@ -84,11 +80,15 @@ class ASRAudioHandler:
logging.info("receive msg={}".format(msg))
# finished
audio_info = json.dumps({
"name": "test.wav",
"signal": "end",
"nbest": 5
}, sort_keys=True, indent=4, separators=(',', ': '))
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "end",
"nbest": 5
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
logging.info("receive msg={}".format(msg))
@ -97,7 +97,7 @@ class ASRAudioHandler:
def main(args):
logging.basicConfig(level=logging.INFO)
logging.info("asr websocket client start")
handler = ASRAudioHandler("127.0.0.1", 8090)
handler = ASRAudioHandler("127.0.0.1", 8091)
loop = asyncio.get_event_loop()
loop.run_until_complete(handler.run(args.wavfile))
logging.info("asr websocket client finished")

@ -12,16 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import logging
import webrtcvad
class VADAudio():
def __init__(self,
aggressiveness,
rate,
frame_duration_ms,
aggressiveness=2,
rate=16000,
frame_duration_ms=20,
sample_width=2,
padding_ms=200,
padding_ratio=0.9):

@ -11,35 +11,39 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import traceback
from typing import Union
import random
import numpy as np
import json
import numpy as np
from fastapi import APIRouter
from fastapi import WebSocket
from fastapi import WebSocketDisconnect
from starlette.websockets import WebSocketState as WebSocketState
from paddlespeech.server.engine.asr.online.asr_engine import ASREngine
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.utils.buffer import ChunkBuffer
from paddlespeech.server.utils.vad import VADAudio
router = APIRouter()
@router.websocket('/ws/asr')
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
engine_pool = get_engine_pool()
asr_engine = engine_pool['asr']
# init buffer
chunk_buffer = ChunkBuffer(sample_width=2)
chunk_buffer_conf = asr_engine.config.chunk_buffer_conf
chunk_buffer = ChunkBuffer(
sample_rate=chunk_buffer_conf['sample_rate'],
sample_width=chunk_buffer_conf['sample_width'])
# init vad
vad = VADAudio(2, 16000, 20)
vad_conf = asr_engine.config.vad_conf
vad = VADAudio(
aggressiveness=vad_conf['aggressiveness'],
rate=vad_conf['sample_rate'],
frame_duration_ms=vad_conf['frame_duration_ms'])
try:
while True:
@ -50,17 +54,11 @@ async def websocket_endpoint(websocket: WebSocket):
if "text" in message:
message = json.loads(message["text"])
if 'signal' not in message:
resp = {
"status": "ok",
"message": "no valid json data"
}
resp = {"status": "ok", "message": "no valid json data"}
await websocket.send_json(resp)
if message['signal'] == 'start':
resp = {
"status": "ok",
"signal": "server_ready"
}
resp = {"status": "ok", "signal": "server_ready"}
# do something at begining here
await websocket.send_json(resp)
elif message['signal'] == 'end':
@ -68,24 +66,19 @@ async def websocket_endpoint(websocket: WebSocket):
asr_engine = engine_pool['asr']
# reset single engine for an new connection
asr_engine.reset()
resp = {
"status": "ok",
"signal": "finished"
}
resp = {"status": "ok", "signal": "finished"}
await websocket.send_json(resp)
break
else:
resp = {
"status": "ok",
"message": "no valid json data"
}
resp = {"status": "ok", "message": "no valid json data"}
await websocket.send_json(resp)
elif "bytes" in message:
message = message["bytes"]
# vad for input bytes audio
vad.add_audio(message)
message = b''.join(f for f in vad.vad_collector() if f is not None)
message = b''.join(f for f in vad.vad_collector()
if f is not None)
engine_pool = get_engine_pool()
asr_engine = engine_pool['asr']
@ -94,7 +87,8 @@ async def websocket_endpoint(websocket: WebSocket):
for frame in frames:
samples = np.frombuffer(frame.bytes, dtype=np.int16)
sample_rate = asr_engine.config.sample_rate
x_chunk, x_chunk_lens = asr_engine.preprocess(samples, sample_rate)
x_chunk, x_chunk_lens = asr_engine.preprocess(samples,
sample_rate)
asr_engine.run(x_chunk, x_chunk_lens)
asr_results = asr_engine.postprocess()

Loading…
Cancel
Save