acs server, test=doc

3 years ago · d94ab22e92
parent a11dc53c1b
commit d94ab22e92
15 changed files with 406 additions and 11 deletions
--- a/demos/audio_content_search/acs_clinet.py
+++ b/demos/audio_content_search/acs_clinet.py
@ -0,0 +1,49 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 from paddlespeech.cli.log import logger
 from paddlespeech.server.utils.audio_handler import ASRHttpHandler
 def main(args):
    logger.info("asr http client start")
    audio_format = "wav"
    sample_rate = 16000
    lang = "zh"
    handler = ASRHttpHandler(
        server_ip=args.server_ip, port=args.port, endpoint=args.endpoint)
    res = handler.run(args.wavfile, audio_format, sample_rate, lang)
    # res = res['result']
    logger.info(f"the final result: {res}")
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="audio content search client")
    parser.add_argument(
        '--server_ip', type=str, default='127.0.0.1', help='server ip')
    parser.add_argument('--port', type=int, default=8090, help='server port')
    parser.add_argument(
        "--wavfile",
        action="store",
        help="wav file path ",
        default="./16_audio.wav")
    parser.add_argument(
        '--endpoint',
        type=str,
        default='/paddlespeech/asr/search',
        help='server endpoint')
    args = parser.parse_args()
    main(args)
--- a/demos/audio_content_search/conf/acs_application.yaml
+++ b/demos/audio_content_search/conf/acs_application.yaml
@ -0,0 +1,36 @@
 # This is the parameter configuration file for PaddleSpeech Serving.
 #################################################################################
 #                             SERVER SETTING                                    #
 #################################################################################
 host: 0.0.0.0
 port: 8490
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['acs_python']
 # protocol = ['http'] (only one can be selected). 
 # http only support offline engine type.
 protocol: 'http'
 engine_list: ['acs_python']
 #################################################################################
 #                                ENGINE CONFIG                                  #
 #################################################################################
 ################################### Text #########################################
 ################### acs task: engine_type: python #######################
 acs_python:
    task: acs
    asr_protocol: 'websocket' # 'websocket'
    offset: 1.0 # second
    asr_server_ip: 127.0.0.1
    asr_server_port: 8390
    lang: 'zh'
    word_list: "words.txt"
    sample_rate: 16000
    device: 'cpu' # set 'gpu:id' or 'cpu'
--- a/demos/audio_content_search/conf/ws_conformer_application.yaml
+++ b/demos/audio_content_search/conf/ws_conformer_application.yaml
@ -0,0 +1,45 @@
 # This is the parameter configuration file for PaddleSpeech Serving.
 #################################################################################
 #                             SERVER SETTING                                    #
 #################################################################################
 host: 0.0.0.0
 port: 8090
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_online']
 # protocol = ['websocket'] (only one can be selected).
 # websocket only support online engine type.
 protocol: 'websocket'
 engine_list: ['asr_online']
 #################################################################################
 #                                ENGINE CONFIG                                  #
 #################################################################################
 ################################### ASR #########################################
 ################### speech task: asr; engine_type: online #######################
 asr_online:
    model_type: 'conformer_online_multicn'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config
    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2
--- a/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
+++ b/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
@ -0,0 +1,46 @@
 # This is the parameter configuration file for PaddleSpeech Serving.
 #################################################################################
 #                             SERVER SETTING                                    #
 #################################################################################
 host: 0.0.0.0
 port: 8390
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_online']
 # protocol = ['websocket'] (only one can be selected).
 # websocket only support online engine type.
 protocol: 'websocket'
 engine_list: ['asr_online']
 #################################################################################
 #                                ENGINE CONFIG                                  #
 #################################################################################
 ################################### ASR #########################################
 ################### speech task: asr; engine_type: online #######################
 asr_online:
    model_type: 'conformer_online_wenetspeech'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config
    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2
--- a/demos/audio_content_search/words.txt
+++ b/demos/audio_content_search/words.txt
@ -0,0 +1,2 @@
 我
 康
--- a/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml
+++ b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml
@ -4,7 +4,7 @@
 #                             SERVER SETTING                                    #
 #################################################################################
 host: 0.0.0.0
-port: 8090
+port: 8390
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_online']
--- a/paddlespeech/server/bin/paddlespeech_server.py
+++ b/paddlespeech/server/bin/paddlespeech_server.py
@ -72,7 +72,7 @@ class ServerExecutor(BaseExecutor):
        else:
            raise Exception("unsupported protocol")
        app.include_router(api_router)
-
+        logger.info("start to init the engine")
        if not init_engine_pool(config):
            return False
--- a/paddlespeech/server/engine/acs/init.py
+++ b/paddlespeech/server/engine/acs/init.py
--- a/paddlespeech/server/engine/acs/python/init.py
+++ b/paddlespeech/server/engine/acs/python/init.py
--- a/paddlespeech/server/engine/acs/python/acs_engine.py
+++ b/paddlespeech/server/engine/acs/python/acs_engine.py
@ -0,0 +1,150 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import io
 import json
 import os
 import re
 import paddle
 import soundfile
 import websocket
 from paddlespeech.cli.log import logger
 from paddlespeech.server.engine.base_engine import BaseEngine
 class ACSEngine(BaseEngine):
    def __init__(self):
        """The ACSEngine Engine
        """
        super(ACSEngine, self).__init__()
        logger.info("Create the ACSEngine Instance")
        self.word_list = []
    def init(self, config: dict):
        """Init the ACSEngine Engine
        Args:
            config (dict): The server configuation
        Returns:
            bool: The engine instance flag
        """
        logger.info("Init the acs engine")
        try:
            self.config = config
            if self.config.device:
                self.device = self.config.device
            else:
                self.device = paddle.get_device()
            paddle.set_device(self.device)
            logger.info(f"ACS Engine set the device: {self.device}")
        except BaseException as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize Text server engine Failed on device: %s." %
                         (self.device))
            return False
        self.read_search_words()
        self.url = "ws://" + self.config.asr_server_ip + ":" + str(
            self.config.asr_server_port) + "/paddlespeech/asr/streaming"
        logger.info("Init the acs engine successfully")
        return True
    def read_search_words(self):
        word_list = self.config.word_list
        if word_list is None:
            logger.error(
                "No word list file in config, please set the word list parameter"
            )
            return
        if not os.path.exists(word_list):
            logger.error("Please input correct word list file")
            return
        with open(word_list, 'r') as fp:
            self.word_list = fp.readlines()
        logger.info(f"word list: {self.word_list}")
    def get_asr_content(self, audio_data):
        logger.info("send a message to the server")
        if self.url is None:
            logger.error("No asr server, please input valid ip and port")
            return ""
        ws = websocket.WebSocket()
        ws.connect(self.url)
        # with websocket.WebSocket.connect(self.url) as ws:
        audio_info = json.dumps(
            {
                "name": "test.wav",
                "signal": "start",
                "nbest": 1
            },
            sort_keys=True,
            indent=4,
            separators=(',', ': '))
        ws.send(audio_info)
        msg = ws.recv()
        logger.info("client receive msg={}".format(msg))
        # send the total audio data
        samples, sample_rate = soundfile.read(audio_data, dtype='int16')
        ws.send_binary(samples.tobytes())
        msg = ws.recv()
        msg = json.loads(msg)
        logger.info(f"audio result: {msg}")
        # 3. send chunk audio data to engine
        logger.info("send the end signal")
        audio_info = json.dumps(
            {
                "name": "test.wav",
                "signal": "end",
                "nbest": 1
            },
            sort_keys=True,
            indent=4,
            separators=(',', ': '))
        ws.send(audio_info)
        msg = ws.recv()
        msg = json.loads(msg)
        logger.info(f"the final result: {msg}")
        ws.close()
        return msg
    def get_macthed_word(self, msg):
        asr_result = msg['result']
        time_stamp = msg['times']
        for w in self.word_list:
            for m in re.finditer(w, asr_result):
                start = time_stamp[m.start(0)]['bg']
                end = time_stamp[m.end(0) - 1]['ed']
                logger.info(f'start: {start}, end: {end}')
    def run(self, audio_data):
        logger.info("start to process the audio content search")
        msg = self.get_asr_content(io.BytesIO(audio_data))
        self.get_macthed_word(msg)
--- a/paddlespeech/server/engine/engine_factory.py
+++ b/paddlespeech/server/engine/engine_factory.py
@ -52,5 +52,8 @@ class EngineFactory(object):
        elif engine_name.lower() == 'vector' and engine_type.lower() == 'python':
            from paddlespeech.server.engine.vector.python.vector_engine import VectorEngine
            return VectorEngine()
        elif engine_name.lower() == 'acs' and engine_type.lower() == 'python':
            from paddlespeech.server.engine.acs.python.acs_engine import ACSEngine
            return ACSEngine()
        else:
            return None
--- a/paddlespeech/server/engine/engine_pool.py
+++ b/paddlespeech/server/engine/engine_pool.py
@ -34,6 +34,7 @@ def init_engine_pool(config) -> bool:
        engine_type = engine_and_type.split("_")[1]
        ENGINE_POOL[engine] = EngineFactory.get_engine(
            engine_name=engine, engine_type=engine_type)
        if not ENGINE_POOL[engine].init(config=config[engine_and_type]):
            return False
--- a/paddlespeech/server/restful/api.py
+++ b/paddlespeech/server/restful/api.py
@ -22,6 +22,7 @@ from paddlespeech.server.restful.cls_api import router as cls_router
 from paddlespeech.server.restful.text_api import router as text_router
 from paddlespeech.server.restful.tts_api import router as tts_router
 from paddlespeech.server.restful.vector_api import router as vec_router
 from paddlespeech.server.restful.acs_api import router as acs_router
 _router = APIRouter()
@ -45,6 +46,8 @@ def setup_router(api_list: List):
            _router.include_router(text_router)
        elif api_name.lower() == 'vector':
            _router.include_router(vec_router)
        elif api_name.lower() == 'acs':
            _router.include_router(acs_router)
        else:
            logger.error(
                f"PaddleSpeech has not support such service: {api_name}")
--- a/paddlespeech/server/utils/audio_handler.py
+++ b/paddlespeech/server/utils/audio_handler.py
@ -96,7 +96,7 @@ class ASRWsAudioHandler:
        self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port)
        logger.info(f"endpoint: {self.url}")
-    def read_wave(self, wavfile_path: str):
+    def read_wave(self, wavfile_path):
        """read the audio file from specific wavfile path
        Args:
@ -129,7 +129,7 @@ class ASRWsAudioHandler:
            x_chunk = padded_x[start:end]
            yield x_chunk
-    async def run(self, wavfile_path: str):
+    async def run(self, wavfile_path):
        """Send a audio file to online server
        Args:
@ -205,7 +205,7 @@ class ASRWsAudioHandler:
 class ASRHttpHandler:
-    def __init__(self, server_ip=None, port=None):
+    def __init__(self, server_ip=None, port=None, endpoint="/paddlespeech/asr"):
        """The ASR client http request
        Args:
@ -219,7 +219,7 @@ class ASRHttpHandler:
            self.url = None
        else:
            self.url = 'http://' + self.server_ip + ":" + str(
-                self.port) + '/paddlespeech/asr'
+                self.port) + endpoint
        logger.info(f"endpoint: {self.url}")
    def run(self, input, audio_format, sample_rate, lang):
--- a/paddlespeech/server/ws/asr_api.py
+++ b/paddlespeech/server/ws/asr_api.py
@ -12,15 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
-
+import base64
 from typing import Union
 from fastapi import APIRouter
 from fastapi import WebSocket
 import soundfile
 import io
 from fastapi import WebSocketDisconnect
 from starlette.websockets import WebSocketState as WebSocketState
 from paddlespeech.cli.log import logger
 from paddlespeech.server.engine.asr.online.asr_engine import PaddleASRConnectionHanddler
 from paddlespeech.server.engine.engine_pool import get_engine_pool
-
+from paddlespeech.server.restful.response import ASRResponse
 from paddlespeech.server.restful.response import ErrorResponse
 from paddlespeech.server.restful.request import ASRRequest
 from paddlespeech.server.utils.exception import ServerBaseException
 from paddlespeech.server.utils.errors import failed_response
 from paddlespeech.server.utils.errors import ErrorCode
 router = APIRouter()
@ -106,5 +115,56 @@ async def websocket_endpoint(websocket: WebSocket):
                # if the engine create the vad instance, this connection will have many period results 
                resp = {'result': asr_results}
                await websocket.send_json(resp)
-    except WebSocketDisconnect:
+    except WebSocketDisconnect as e:
-        pass
+        logger.error(e)
 # @router.post(
 #     "/paddlespeech/asr/search/", response_model=Union[ASRResponse, ErrorResponse])
 # def asr(request_body: ASRRequest):
 #     """asr api 
 #     Args:
 #         request_body (ASRRequest): [description]
 #     Returns:
 #         json: [description]
 #     """
 #     try:
 #         audio_data = base64.b64decode(request_body.audio)
 #         # get single engine from engine pool
 #         engine_pool = get_engine_pool()
 #         asr_engine = engine_pool['asr']
 #         samples, sample_rate = soundfile.read(io.BytesIO(audio_data), dtype='int16')
 #         # print(samples.shape)
 #         # print(sample_rate)
 #         connection_handler = PaddleASRConnectionHanddler(asr_engine)
 #         connection_handler.extract_feat(samples)
 #         connection_handler.decode(is_finished=True)
 #         asr_results = connection_handler.rescoring()
 #         asr_results = connection_handler.get_result()
 #         word_time_stamp = connection_handler.get_word_time_stamp()
 #         response = {
 #             "success": True,
 #             "code": 200,
 #             "message": {
 #                 "description": "success"
 #             },
 #             "result": {
 #                 "transcription": asr_results,
 #                 "times": word_time_stamp
 #             }
 #         }
 #     except ServerBaseException as e:
 #         response = failed_response(e.error_code, e.msg)
 #     except BaseException as e:
 #         response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
 #         print(e)
 #     return response