From d94ab22e925e195254d02f7706d6c536479cb9bd Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Sat, 14 May 2022 12:06:02 +0800 Subject: [PATCH] acs server, test=doc --- demos/audio_content_search/acs_clinet.py | 49 ++++++ .../conf/acs_application.yaml | 36 +++++ .../conf/ws_conformer_application.yaml | 45 ++++++ .../ws_conformer_wenetspeech_application.yaml | 46 ++++++ demos/audio_content_search/words.txt | 2 + .../ws_conformer_wenetspeech_application.yaml | 2 +- .../server/bin/paddlespeech_server.py | 2 +- paddlespeech/server/engine/acs/__init__.py | 0 .../server/engine/acs/python/__init__.py | 0 .../server/engine/acs/python/acs_engine.py | 150 ++++++++++++++++++ paddlespeech/server/engine/engine_factory.py | 3 + paddlespeech/server/engine/engine_pool.py | 1 + paddlespeech/server/restful/api.py | 3 + paddlespeech/server/utils/audio_handler.py | 10 +- paddlespeech/server/ws/asr_api.py | 68 +++++++- 15 files changed, 406 insertions(+), 11 deletions(-) create mode 100644 demos/audio_content_search/acs_clinet.py create mode 100644 demos/audio_content_search/conf/acs_application.yaml create mode 100644 demos/audio_content_search/conf/ws_conformer_application.yaml create mode 100644 demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml create mode 100644 demos/audio_content_search/words.txt create mode 100644 paddlespeech/server/engine/acs/__init__.py create mode 100644 paddlespeech/server/engine/acs/python/__init__.py create mode 100644 paddlespeech/server/engine/acs/python/acs_engine.py diff --git a/demos/audio_content_search/acs_clinet.py b/demos/audio_content_search/acs_clinet.py new file mode 100644 index 00000000..11f99aca --- /dev/null +++ b/demos/audio_content_search/acs_clinet.py @@ -0,0 +1,49 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse + +from paddlespeech.cli.log import logger +from paddlespeech.server.utils.audio_handler import ASRHttpHandler + + +def main(args): + logger.info("asr http client start") + audio_format = "wav" + sample_rate = 16000 + lang = "zh" + handler = ASRHttpHandler( + server_ip=args.server_ip, port=args.port, endpoint=args.endpoint) + res = handler.run(args.wavfile, audio_format, sample_rate, lang) + # res = res['result'] + logger.info(f"the final result: {res}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="audio content search client") + parser.add_argument( + '--server_ip', type=str, default='127.0.0.1', help='server ip') + parser.add_argument('--port', type=int, default=8090, help='server port') + parser.add_argument( + "--wavfile", + action="store", + help="wav file path ", + default="./16_audio.wav") + parser.add_argument( + '--endpoint', + type=str, + default='/paddlespeech/asr/search', + help='server endpoint') + args = parser.parse_args() + + main(args) diff --git a/demos/audio_content_search/conf/acs_application.yaml b/demos/audio_content_search/conf/acs_application.yaml new file mode 100644 index 00000000..010661e3 --- /dev/null +++ b/demos/audio_content_search/conf/acs_application.yaml @@ -0,0 +1,36 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8490 + +# The task format in the engin_list is: _ +# task choices = ['acs_python'] +# protocol = ['http'] (only one can be selected). +# http only support offline engine type. +protocol: 'http' +engine_list: ['acs_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### Text ######################################### +################### acs task: engine_type: python ####################### +acs_python: + task: acs + asr_protocol: 'websocket' # 'websocket' + offset: 1.0 # second + asr_server_ip: 127.0.0.1 + asr_server_port: 8390 + lang: 'zh' + word_list: "words.txt" + sample_rate: 16000 + device: 'cpu' # set 'gpu:id' or 'cpu' + + + + diff --git a/demos/audio_content_search/conf/ws_conformer_application.yaml b/demos/audio_content_search/conf/ws_conformer_application.yaml new file mode 100644 index 00000000..2affde07 --- /dev/null +++ b/demos/audio_content_search/conf/ws_conformer_application.yaml @@ -0,0 +1,45 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_multicn' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: 'cpu' # cpu or gpu:id + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 diff --git a/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml b/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml new file mode 100644 index 00000000..c23680bd --- /dev/null +++ b/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml @@ -0,0 +1,46 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8390 + +# The task format in the engin_list is: _ +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_wenetspeech' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: 'cpu' # cpu or gpu:id + decode_method: "attention_rescoring" + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 diff --git a/demos/audio_content_search/words.txt b/demos/audio_content_search/words.txt new file mode 100644 index 00000000..25510eb4 --- /dev/null +++ b/demos/audio_content_search/words.txt @@ -0,0 +1,2 @@ +我 +康 \ No newline at end of file diff --git a/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml index e9a89c19..c23680bd 100644 --- a/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml +++ b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml @@ -4,7 +4,7 @@ # SERVER SETTING # ################################################################################# host: 0.0.0.0 -port: 8090 +port: 8390 # The task format in the engin_list is: _ # task choices = ['asr_online'] diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index db92f179..09047e81 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -72,7 +72,7 @@ class ServerExecutor(BaseExecutor): else: raise Exception("unsupported protocol") app.include_router(api_router) - + logger.info("start to init the engine") if not init_engine_pool(config): return False diff --git a/paddlespeech/server/engine/acs/__init__.py b/paddlespeech/server/engine/acs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/paddlespeech/server/engine/acs/python/__init__.py b/paddlespeech/server/engine/acs/python/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/paddlespeech/server/engine/acs/python/acs_engine.py b/paddlespeech/server/engine/acs/python/acs_engine.py new file mode 100644 index 00000000..42cdbb0a --- /dev/null +++ b/paddlespeech/server/engine/acs/python/acs_engine.py @@ -0,0 +1,150 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import io +import json +import os +import re + +import paddle +import soundfile +import websocket + +from paddlespeech.cli.log import logger +from paddlespeech.server.engine.base_engine import BaseEngine + + +class ACSEngine(BaseEngine): + def __init__(self): + """The ACSEngine Engine + """ + super(ACSEngine, self).__init__() + logger.info("Create the ACSEngine Instance") + self.word_list = [] + + def init(self, config: dict): + """Init the ACSEngine Engine + + Args: + config (dict): The server configuation + + Returns: + bool: The engine instance flag + """ + logger.info("Init the acs engine") + try: + self.config = config + if self.config.device: + self.device = self.config.device + else: + self.device = paddle.get_device() + + paddle.set_device(self.device) + logger.info(f"ACS Engine set the device: {self.device}") + + except BaseException as e: + logger.error( + "Set device failed, please check if device is already used and the parameter 'device' in the yaml file" + ) + logger.error("Initialize Text server engine Failed on device: %s." % + (self.device)) + return False + + self.read_search_words() + + self.url = "ws://" + self.config.asr_server_ip + ":" + str( + self.config.asr_server_port) + "/paddlespeech/asr/streaming" + + logger.info("Init the acs engine successfully") + return True + + def read_search_words(self): + word_list = self.config.word_list + if word_list is None: + logger.error( + "No word list file in config, please set the word list parameter" + ) + return + + if not os.path.exists(word_list): + logger.error("Please input correct word list file") + return + + with open(word_list, 'r') as fp: + self.word_list = fp.readlines() + + logger.info(f"word list: {self.word_list}") + + def get_asr_content(self, audio_data): + logger.info("send a message to the server") + if self.url is None: + logger.error("No asr server, please input valid ip and port") + return "" + ws = websocket.WebSocket() + ws.connect(self.url) + # with websocket.WebSocket.connect(self.url) as ws: + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "start", + "nbest": 1 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + ws.send(audio_info) + msg = ws.recv() + logger.info("client receive msg={}".format(msg)) + + # send the total audio data + samples, sample_rate = soundfile.read(audio_data, dtype='int16') + ws.send_binary(samples.tobytes()) + msg = ws.recv() + msg = json.loads(msg) + logger.info(f"audio result: {msg}") + + # 3. send chunk audio data to engine + logger.info("send the end signal") + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "end", + "nbest": 1 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + ws.send(audio_info) + msg = ws.recv() + msg = json.loads(msg) + + logger.info(f"the final result: {msg}") + ws.close() + + return msg + + def get_macthed_word(self, msg): + asr_result = msg['result'] + time_stamp = msg['times'] + + for w in self.word_list: + for m in re.finditer(w, asr_result): + start = time_stamp[m.start(0)]['bg'] + end = time_stamp[m.end(0) - 1]['ed'] + logger.info(f'start: {start}, end: {end}') + + def run(self, audio_data): + logger.info("start to process the audio content search") + msg = self.get_asr_content(io.BytesIO(audio_data)) + + self.get_macthed_word(msg) diff --git a/paddlespeech/server/engine/engine_factory.py b/paddlespeech/server/engine/engine_factory.py index 6cf95d75..5fdaacce 100644 --- a/paddlespeech/server/engine/engine_factory.py +++ b/paddlespeech/server/engine/engine_factory.py @@ -52,5 +52,8 @@ class EngineFactory(object): elif engine_name.lower() == 'vector' and engine_type.lower() == 'python': from paddlespeech.server.engine.vector.python.vector_engine import VectorEngine return VectorEngine() + elif engine_name.lower() == 'acs' and engine_type.lower() == 'python': + from paddlespeech.server.engine.acs.python.acs_engine import ACSEngine + return ACSEngine() else: return None diff --git a/paddlespeech/server/engine/engine_pool.py b/paddlespeech/server/engine/engine_pool.py index 9de73567..5300303f 100644 --- a/paddlespeech/server/engine/engine_pool.py +++ b/paddlespeech/server/engine/engine_pool.py @@ -34,6 +34,7 @@ def init_engine_pool(config) -> bool: engine_type = engine_and_type.split("_")[1] ENGINE_POOL[engine] = EngineFactory.get_engine( engine_name=engine, engine_type=engine_type) + if not ENGINE_POOL[engine].init(config=config[engine_and_type]): return False diff --git a/paddlespeech/server/restful/api.py b/paddlespeech/server/restful/api.py index 63f865e8..1c2dd281 100644 --- a/paddlespeech/server/restful/api.py +++ b/paddlespeech/server/restful/api.py @@ -22,6 +22,7 @@ from paddlespeech.server.restful.cls_api import router as cls_router from paddlespeech.server.restful.text_api import router as text_router from paddlespeech.server.restful.tts_api import router as tts_router from paddlespeech.server.restful.vector_api import router as vec_router +from paddlespeech.server.restful.acs_api import router as acs_router _router = APIRouter() @@ -45,6 +46,8 @@ def setup_router(api_list: List): _router.include_router(text_router) elif api_name.lower() == 'vector': _router.include_router(vec_router) + elif api_name.lower() == 'acs': + _router.include_router(acs_router) else: logger.error( f"PaddleSpeech has not support such service: {api_name}") diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py index b85cf485..0fcdd08a 100644 --- a/paddlespeech/server/utils/audio_handler.py +++ b/paddlespeech/server/utils/audio_handler.py @@ -96,7 +96,7 @@ class ASRWsAudioHandler: self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port) logger.info(f"endpoint: {self.url}") - def read_wave(self, wavfile_path: str): + def read_wave(self, wavfile_path): """read the audio file from specific wavfile path Args: @@ -129,7 +129,7 @@ class ASRWsAudioHandler: x_chunk = padded_x[start:end] yield x_chunk - async def run(self, wavfile_path: str): + async def run(self, wavfile_path): """Send a audio file to online server Args: @@ -205,7 +205,7 @@ class ASRWsAudioHandler: class ASRHttpHandler: - def __init__(self, server_ip=None, port=None): + def __init__(self, server_ip=None, port=None, endpoint="/paddlespeech/asr"): """The ASR client http request Args: @@ -219,7 +219,7 @@ class ASRHttpHandler: self.url = None else: self.url = 'http://' + self.server_ip + ":" + str( - self.port) + '/paddlespeech/asr' + self.port) + endpoint logger.info(f"endpoint: {self.url}") def run(self, input, audio_format, sample_rate, lang): @@ -248,7 +248,7 @@ class ASRHttpHandler: } res = requests.post(url=self.url, data=json.dumps(data)) - + return res.json() diff --git a/paddlespeech/server/ws/asr_api.py b/paddlespeech/server/ws/asr_api.py index 0f7dcddd..bf6e912c 100644 --- a/paddlespeech/server/ws/asr_api.py +++ b/paddlespeech/server/ws/asr_api.py @@ -12,15 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. import json - +import base64 +from typing import Union from fastapi import APIRouter from fastapi import WebSocket +import soundfile +import io from fastapi import WebSocketDisconnect from starlette.websockets import WebSocketState as WebSocketState +from paddlespeech.cli.log import logger from paddlespeech.server.engine.asr.online.asr_engine import PaddleASRConnectionHanddler from paddlespeech.server.engine.engine_pool import get_engine_pool - +from paddlespeech.server.restful.response import ASRResponse +from paddlespeech.server.restful.response import ErrorResponse +from paddlespeech.server.restful.request import ASRRequest +from paddlespeech.server.utils.exception import ServerBaseException +from paddlespeech.server.utils.errors import failed_response +from paddlespeech.server.utils.errors import ErrorCode router = APIRouter() @@ -106,5 +115,56 @@ async def websocket_endpoint(websocket: WebSocket): # if the engine create the vad instance, this connection will have many period results resp = {'result': asr_results} await websocket.send_json(resp) - except WebSocketDisconnect: - pass + except WebSocketDisconnect as e: + logger.error(e) + + +# @router.post( +# "/paddlespeech/asr/search/", response_model=Union[ASRResponse, ErrorResponse]) +# def asr(request_body: ASRRequest): +# """asr api + +# Args: +# request_body (ASRRequest): [description] + +# Returns: +# json: [description] +# """ +# try: +# audio_data = base64.b64decode(request_body.audio) + +# # get single engine from engine pool +# engine_pool = get_engine_pool() +# asr_engine = engine_pool['asr'] + +# samples, sample_rate = soundfile.read(io.BytesIO(audio_data), dtype='int16') +# # print(samples.shape) +# # print(sample_rate) +# connection_handler = PaddleASRConnectionHanddler(asr_engine) +# connection_handler.extract_feat(samples) + +# connection_handler.decode(is_finished=True) +# asr_results = connection_handler.rescoring() +# asr_results = connection_handler.get_result() +# word_time_stamp = connection_handler.get_word_time_stamp() + +# response = { +# "success": True, +# "code": 200, +# "message": { +# "description": "success" +# }, +# "result": { +# "transcription": asr_results, +# "times": word_time_stamp +# } +# } + + +# except ServerBaseException as e: +# response = failed_response(e.error_code, e.msg) +# except BaseException as e: +# response = failed_response(ErrorCode.SERVER_UNKOWN_ERR) +# print(e) + +# return response \ No newline at end of file