acs server, test=doc

pull/1906/head
xiongxinlei 2 years ago
parent a11dc53c1b
commit d94ab22e92

@ -0,0 +1,49 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_handler import ASRHttpHandler
def main(args):
logger.info("asr http client start")
audio_format = "wav"
sample_rate = 16000
lang = "zh"
handler = ASRHttpHandler(
server_ip=args.server_ip, port=args.port, endpoint=args.endpoint)
res = handler.run(args.wavfile, audio_format, sample_rate, lang)
# res = res['result']
logger.info(f"the final result: {res}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="audio content search client")
parser.add_argument(
'--server_ip', type=str, default='127.0.0.1', help='server ip')
parser.add_argument('--port', type=int, default=8090, help='server port')
parser.add_argument(
"--wavfile",
action="store",
help="wav file path ",
default="./16_audio.wav")
parser.add_argument(
'--endpoint',
type=str,
default='/paddlespeech/asr/search',
help='server endpoint')
args = parser.parse_args()
main(args)

@ -0,0 +1,36 @@
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8490
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['acs_python']
# protocol = ['http'] (only one can be selected).
# http only support offline engine type.
protocol: 'http'
engine_list: ['acs_python']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### Text #########################################
################### acs task: engine_type: python #######################
acs_python:
task: acs
asr_protocol: 'websocket' # 'websocket'
offset: 1.0 # second
asr_server_ip: 127.0.0.1
asr_server_port: 8390
lang: 'zh'
word_list: "words.txt"
sample_rate: 16000
device: 'cpu' # set 'gpu:id' or 'cpu'

@ -0,0 +1,45 @@
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8090
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
model_type: 'conformer_online_multicn'
am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional]
lang: 'zh'
sample_rate: 16000
cfg_path:
decode_method:
force_yes: True
device: 'cpu' # cpu or gpu:id
am_predictor_conf:
device: # set 'gpu:id' or 'cpu'
switch_ir_optim: True
glog_info: False # True -> print glog
summary: True # False -> do not show predictor config
chunk_buffer_conf:
window_n: 7 # frame
shift_n: 4 # frame
window_ms: 25 # ms
shift_ms: 10 # ms
sample_rate: 16000
sample_width: 2

@ -0,0 +1,46 @@
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8390
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
model_type: 'conformer_online_wenetspeech'
am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional]
lang: 'zh'
sample_rate: 16000
cfg_path:
decode_method:
force_yes: True
device: 'cpu' # cpu or gpu:id
decode_method: "attention_rescoring"
am_predictor_conf:
device: # set 'gpu:id' or 'cpu'
switch_ir_optim: True
glog_info: False # True -> print glog
summary: True # False -> do not show predictor config
chunk_buffer_conf:
window_n: 7 # frame
shift_n: 4 # frame
window_ms: 25 # ms
shift_ms: 10 # ms
sample_rate: 16000
sample_width: 2

@ -4,7 +4,7 @@
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8090
port: 8390
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']

@ -72,7 +72,7 @@ class ServerExecutor(BaseExecutor):
else:
raise Exception("unsupported protocol")
app.include_router(api_router)
logger.info("start to init the engine")
if not init_engine_pool(config):
return False

@ -0,0 +1,150 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import json
import os
import re
import paddle
import soundfile
import websocket
from paddlespeech.cli.log import logger
from paddlespeech.server.engine.base_engine import BaseEngine
class ACSEngine(BaseEngine):
def __init__(self):
"""The ACSEngine Engine
"""
super(ACSEngine, self).__init__()
logger.info("Create the ACSEngine Instance")
self.word_list = []
def init(self, config: dict):
"""Init the ACSEngine Engine
Args:
config (dict): The server configuation
Returns:
bool: The engine instance flag
"""
logger.info("Init the acs engine")
try:
self.config = config
if self.config.device:
self.device = self.config.device
else:
self.device = paddle.get_device()
paddle.set_device(self.device)
logger.info(f"ACS Engine set the device: {self.device}")
except BaseException as e:
logger.error(
"Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
)
logger.error("Initialize Text server engine Failed on device: %s." %
(self.device))
return False
self.read_search_words()
self.url = "ws://" + self.config.asr_server_ip + ":" + str(
self.config.asr_server_port) + "/paddlespeech/asr/streaming"
logger.info("Init the acs engine successfully")
return True
def read_search_words(self):
word_list = self.config.word_list
if word_list is None:
logger.error(
"No word list file in config, please set the word list parameter"
)
return
if not os.path.exists(word_list):
logger.error("Please input correct word list file")
return
with open(word_list, 'r') as fp:
self.word_list = fp.readlines()
logger.info(f"word list: {self.word_list}")
def get_asr_content(self, audio_data):
logger.info("send a message to the server")
if self.url is None:
logger.error("No asr server, please input valid ip and port")
return ""
ws = websocket.WebSocket()
ws.connect(self.url)
# with websocket.WebSocket.connect(self.url) as ws:
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "start",
"nbest": 1
},
sort_keys=True,
indent=4,
separators=(',', ': '))
ws.send(audio_info)
msg = ws.recv()
logger.info("client receive msg={}".format(msg))
# send the total audio data
samples, sample_rate = soundfile.read(audio_data, dtype='int16')
ws.send_binary(samples.tobytes())
msg = ws.recv()
msg = json.loads(msg)
logger.info(f"audio result: {msg}")
# 3. send chunk audio data to engine
logger.info("send the end signal")
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "end",
"nbest": 1
},
sort_keys=True,
indent=4,
separators=(',', ': '))
ws.send(audio_info)
msg = ws.recv()
msg = json.loads(msg)
logger.info(f"the final result: {msg}")
ws.close()
return msg
def get_macthed_word(self, msg):
asr_result = msg['result']
time_stamp = msg['times']
for w in self.word_list:
for m in re.finditer(w, asr_result):
start = time_stamp[m.start(0)]['bg']
end = time_stamp[m.end(0) - 1]['ed']
logger.info(f'start: {start}, end: {end}')
def run(self, audio_data):
logger.info("start to process the audio content search")
msg = self.get_asr_content(io.BytesIO(audio_data))
self.get_macthed_word(msg)

@ -52,5 +52,8 @@ class EngineFactory(object):
elif engine_name.lower() == 'vector' and engine_type.lower() == 'python':
from paddlespeech.server.engine.vector.python.vector_engine import VectorEngine
return VectorEngine()
elif engine_name.lower() == 'acs' and engine_type.lower() == 'python':
from paddlespeech.server.engine.acs.python.acs_engine import ACSEngine
return ACSEngine()
else:
return None

@ -34,6 +34,7 @@ def init_engine_pool(config) -> bool:
engine_type = engine_and_type.split("_")[1]
ENGINE_POOL[engine] = EngineFactory.get_engine(
engine_name=engine, engine_type=engine_type)
if not ENGINE_POOL[engine].init(config=config[engine_and_type]):
return False

@ -22,6 +22,7 @@ from paddlespeech.server.restful.cls_api import router as cls_router
from paddlespeech.server.restful.text_api import router as text_router
from paddlespeech.server.restful.tts_api import router as tts_router
from paddlespeech.server.restful.vector_api import router as vec_router
from paddlespeech.server.restful.acs_api import router as acs_router
_router = APIRouter()
@ -45,6 +46,8 @@ def setup_router(api_list: List):
_router.include_router(text_router)
elif api_name.lower() == 'vector':
_router.include_router(vec_router)
elif api_name.lower() == 'acs':
_router.include_router(acs_router)
else:
logger.error(
f"PaddleSpeech has not support such service: {api_name}")

@ -96,7 +96,7 @@ class ASRWsAudioHandler:
self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port)
logger.info(f"endpoint: {self.url}")
def read_wave(self, wavfile_path: str):
def read_wave(self, wavfile_path):
"""read the audio file from specific wavfile path
Args:
@ -129,7 +129,7 @@ class ASRWsAudioHandler:
x_chunk = padded_x[start:end]
yield x_chunk
async def run(self, wavfile_path: str):
async def run(self, wavfile_path):
"""Send a audio file to online server
Args:
@ -205,7 +205,7 @@ class ASRWsAudioHandler:
class ASRHttpHandler:
def __init__(self, server_ip=None, port=None):
def __init__(self, server_ip=None, port=None, endpoint="/paddlespeech/asr"):
"""The ASR client http request
Args:
@ -219,7 +219,7 @@ class ASRHttpHandler:
self.url = None
else:
self.url = 'http://' + self.server_ip + ":" + str(
self.port) + '/paddlespeech/asr'
self.port) + endpoint
logger.info(f"endpoint: {self.url}")
def run(self, input, audio_format, sample_rate, lang):
@ -248,7 +248,7 @@ class ASRHttpHandler:
}
res = requests.post(url=self.url, data=json.dumps(data))
return res.json()

@ -12,15 +12,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import base64
from typing import Union
from fastapi import APIRouter
from fastapi import WebSocket
import soundfile
import io
from fastapi import WebSocketDisconnect
from starlette.websockets import WebSocketState as WebSocketState
from paddlespeech.cli.log import logger
from paddlespeech.server.engine.asr.online.asr_engine import PaddleASRConnectionHanddler
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.restful.response import ASRResponse
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.restful.request import ASRRequest
from paddlespeech.server.utils.exception import ServerBaseException
from paddlespeech.server.utils.errors import failed_response
from paddlespeech.server.utils.errors import ErrorCode
router = APIRouter()
@ -106,5 +115,56 @@ async def websocket_endpoint(websocket: WebSocket):
# if the engine create the vad instance, this connection will have many period results
resp = {'result': asr_results}
await websocket.send_json(resp)
except WebSocketDisconnect:
pass
except WebSocketDisconnect as e:
logger.error(e)
# @router.post(
# "/paddlespeech/asr/search/", response_model=Union[ASRResponse, ErrorResponse])
# def asr(request_body: ASRRequest):
# """asr api
# Args:
# request_body (ASRRequest): [description]
# Returns:
# json: [description]
# """
# try:
# audio_data = base64.b64decode(request_body.audio)
# # get single engine from engine pool
# engine_pool = get_engine_pool()
# asr_engine = engine_pool['asr']
# samples, sample_rate = soundfile.read(io.BytesIO(audio_data), dtype='int16')
# # print(samples.shape)
# # print(sample_rate)
# connection_handler = PaddleASRConnectionHanddler(asr_engine)
# connection_handler.extract_feat(samples)
# connection_handler.decode(is_finished=True)
# asr_results = connection_handler.rescoring()
# asr_results = connection_handler.get_result()
# word_time_stamp = connection_handler.get_word_time_stamp()
# response = {
# "success": True,
# "code": 200,
# "message": {
# "description": "success"
# },
# "result": {
# "transcription": asr_results,
# "times": word_time_stamp
# }
# }
# except ServerBaseException as e:
# response = failed_response(e.error_code, e.msg)
# except BaseException as e:
# response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
# print(e)
# return response
Loading…
Cancel
Save