From d94ab22e925e195254d02f7706d6c536479cb9bd Mon Sep 17 00:00:00 2001
From: xiongxinlei <xiongxinlei@baidu.com>
Date: Sat, 14 May 2022 12:06:02 +0800
Subject: [PATCH] acs server, test=doc

---
 demos/audio_content_search/acs_clinet.py      |  49 ++++++
 .../conf/acs_application.yaml                 |  36 +++++
 .../conf/ws_conformer_application.yaml        |  45 ++++++
 .../ws_conformer_wenetspeech_application.yaml |  46 ++++++
 demos/audio_content_search/words.txt          |   2 +
 .../ws_conformer_wenetspeech_application.yaml |   2 +-
 .../server/bin/paddlespeech_server.py         |   2 +-
 paddlespeech/server/engine/acs/__init__.py    |   0
 .../server/engine/acs/python/__init__.py      |   0
 .../server/engine/acs/python/acs_engine.py    | 150 ++++++++++++++++++
 paddlespeech/server/engine/engine_factory.py  |   3 +
 paddlespeech/server/engine/engine_pool.py     |   1 +
 paddlespeech/server/restful/api.py            |   3 +
 paddlespeech/server/utils/audio_handler.py    |  10 +-
 paddlespeech/server/ws/asr_api.py             |  68 +++++++-
 15 files changed, 406 insertions(+), 11 deletions(-)
 create mode 100644 demos/audio_content_search/acs_clinet.py
 create mode 100644 demos/audio_content_search/conf/acs_application.yaml
 create mode 100644 demos/audio_content_search/conf/ws_conformer_application.yaml
 create mode 100644 demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
 create mode 100644 demos/audio_content_search/words.txt
 create mode 100644 paddlespeech/server/engine/acs/__init__.py
 create mode 100644 paddlespeech/server/engine/acs/python/__init__.py
 create mode 100644 paddlespeech/server/engine/acs/python/acs_engine.py

diff --git a/demos/audio_content_search/acs_clinet.py b/demos/audio_content_search/acs_clinet.py
new file mode 100644
index 00000000..11f99aca
--- /dev/null
+++ b/demos/audio_content_search/acs_clinet.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+
+from paddlespeech.cli.log import logger
+from paddlespeech.server.utils.audio_handler import ASRHttpHandler
+
+
+def main(args):
+    logger.info("asr http client start")
+    audio_format = "wav"
+    sample_rate = 16000
+    lang = "zh"
+    handler = ASRHttpHandler(
+        server_ip=args.server_ip, port=args.port, endpoint=args.endpoint)
+    res = handler.run(args.wavfile, audio_format, sample_rate, lang)
+    # res = res['result']
+    logger.info(f"the final result: {res}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="audio content search client")
+    parser.add_argument(
+        '--server_ip', type=str, default='127.0.0.1', help='server ip')
+    parser.add_argument('--port', type=int, default=8090, help='server port')
+    parser.add_argument(
+        "--wavfile",
+        action="store",
+        help="wav file path ",
+        default="./16_audio.wav")
+    parser.add_argument(
+        '--endpoint',
+        type=str,
+        default='/paddlespeech/asr/search',
+        help='server endpoint')
+    args = parser.parse_args()
+
+    main(args)
diff --git a/demos/audio_content_search/conf/acs_application.yaml b/demos/audio_content_search/conf/acs_application.yaml
new file mode 100644
index 00000000..010661e3
--- /dev/null
+++ b/demos/audio_content_search/conf/acs_application.yaml
@@ -0,0 +1,36 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8490
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['acs_python']
+# protocol = ['http'] (only one can be selected). 
+# http only support offline engine type.
+protocol: 'http'
+engine_list: ['acs_python']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### Text #########################################
+################### acs task: engine_type: python #######################
+acs_python:
+    task: acs
+    asr_protocol: 'websocket' # 'websocket'
+    offset: 1.0 # second
+    asr_server_ip: 127.0.0.1
+    asr_server_port: 8390
+    lang: 'zh'
+    word_list: "words.txt"
+    sample_rate: 16000
+    device: 'cpu' # set 'gpu:id' or 'cpu'
+
+
+
+
diff --git a/demos/audio_content_search/conf/ws_conformer_application.yaml b/demos/audio_content_search/conf/ws_conformer_application.yaml
new file mode 100644
index 00000000..2affde07
--- /dev/null
+++ b/demos/audio_content_search/conf/ws_conformer_application.yaml
@@ -0,0 +1,45 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8090
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+    model_type: 'conformer_online_multicn'
+    am_model: # the pdmodel file of am static model [optional]
+    am_params:  # the pdiparams file of am static model [optional]
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: 
+    decode_method: 
+    force_yes: True
+    device: 'cpu' # cpu or gpu:id
+    am_predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
+    chunk_buffer_conf:
+        window_n: 7     # frame
+        shift_n: 4      # frame
+        window_ms: 25   # ms
+        shift_ms: 10    # ms
+        sample_rate: 16000
+        sample_width: 2
diff --git a/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml b/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
new file mode 100644
index 00000000..c23680bd
--- /dev/null
+++ b/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
@@ -0,0 +1,46 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8390
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+    model_type: 'conformer_online_wenetspeech'
+    am_model: # the pdmodel file of am static model [optional]
+    am_params:  # the pdiparams file of am static model [optional]
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: 
+    decode_method: 
+    force_yes: True
+    device: 'cpu' # cpu or gpu:id
+    decode_method: "attention_rescoring"
+    am_predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
+    chunk_buffer_conf:
+        window_n: 7     # frame
+        shift_n: 4      # frame
+        window_ms: 25   # ms
+        shift_ms: 10    # ms
+        sample_rate: 16000
+        sample_width: 2
diff --git a/demos/audio_content_search/words.txt b/demos/audio_content_search/words.txt
new file mode 100644
index 00000000..25510eb4
--- /dev/null
+++ b/demos/audio_content_search/words.txt
@@ -0,0 +1,2 @@
+我
+康
\ No newline at end of file
diff --git a/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml
index e9a89c19..c23680bd 100644
--- a/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml
+++ b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml
@@ -4,7 +4,7 @@
 #                             SERVER SETTING                                    #
 #################################################################################
 host: 0.0.0.0
-port: 8090
+port: 8390
 
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_online']
diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py
index db92f179..09047e81 100644
--- a/paddlespeech/server/bin/paddlespeech_server.py
+++ b/paddlespeech/server/bin/paddlespeech_server.py
@@ -72,7 +72,7 @@ class ServerExecutor(BaseExecutor):
         else:
             raise Exception("unsupported protocol")
         app.include_router(api_router)
-
+        logger.info("start to init the engine")
         if not init_engine_pool(config):
             return False
 
diff --git a/paddlespeech/server/engine/acs/__init__.py b/paddlespeech/server/engine/acs/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/paddlespeech/server/engine/acs/python/__init__.py b/paddlespeech/server/engine/acs/python/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/paddlespeech/server/engine/acs/python/acs_engine.py b/paddlespeech/server/engine/acs/python/acs_engine.py
new file mode 100644
index 00000000..42cdbb0a
--- /dev/null
+++ b/paddlespeech/server/engine/acs/python/acs_engine.py
@@ -0,0 +1,150 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import json
+import os
+import re
+
+import paddle
+import soundfile
+import websocket
+
+from paddlespeech.cli.log import logger
+from paddlespeech.server.engine.base_engine import BaseEngine
+
+
+class ACSEngine(BaseEngine):
+    def __init__(self):
+        """The ACSEngine Engine
+        """
+        super(ACSEngine, self).__init__()
+        logger.info("Create the ACSEngine Instance")
+        self.word_list = []
+
+    def init(self, config: dict):
+        """Init the ACSEngine Engine
+
+        Args:
+            config (dict): The server configuation
+
+        Returns:
+            bool: The engine instance flag
+        """
+        logger.info("Init the acs engine")
+        try:
+            self.config = config
+            if self.config.device:
+                self.device = self.config.device
+            else:
+                self.device = paddle.get_device()
+
+            paddle.set_device(self.device)
+            logger.info(f"ACS Engine set the device: {self.device}")
+
+        except BaseException as e:
+            logger.error(
+                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
+            )
+            logger.error("Initialize Text server engine Failed on device: %s." %
+                         (self.device))
+            return False
+
+        self.read_search_words()
+
+        self.url = "ws://" + self.config.asr_server_ip + ":" + str(
+            self.config.asr_server_port) + "/paddlespeech/asr/streaming"
+
+        logger.info("Init the acs engine successfully")
+        return True
+
+    def read_search_words(self):
+        word_list = self.config.word_list
+        if word_list is None:
+            logger.error(
+                "No word list file in config, please set the word list parameter"
+            )
+            return
+
+        if not os.path.exists(word_list):
+            logger.error("Please input correct word list file")
+            return
+
+        with open(word_list, 'r') as fp:
+            self.word_list = fp.readlines()
+
+        logger.info(f"word list: {self.word_list}")
+
+    def get_asr_content(self, audio_data):
+        logger.info("send a message to the server")
+        if self.url is None:
+            logger.error("No asr server, please input valid ip and port")
+            return ""
+        ws = websocket.WebSocket()
+        ws.connect(self.url)
+        # with websocket.WebSocket.connect(self.url) as ws:
+        audio_info = json.dumps(
+            {
+                "name": "test.wav",
+                "signal": "start",
+                "nbest": 1
+            },
+            sort_keys=True,
+            indent=4,
+            separators=(',', ': '))
+        ws.send(audio_info)
+        msg = ws.recv()
+        logger.info("client receive msg={}".format(msg))
+
+        # send the total audio data
+        samples, sample_rate = soundfile.read(audio_data, dtype='int16')
+        ws.send_binary(samples.tobytes())
+        msg = ws.recv()
+        msg = json.loads(msg)
+        logger.info(f"audio result: {msg}")
+
+        # 3. send chunk audio data to engine
+        logger.info("send the end signal")
+        audio_info = json.dumps(
+            {
+                "name": "test.wav",
+                "signal": "end",
+                "nbest": 1
+            },
+            sort_keys=True,
+            indent=4,
+            separators=(',', ': '))
+        ws.send(audio_info)
+        msg = ws.recv()
+        msg = json.loads(msg)
+
+        logger.info(f"the final result: {msg}")
+        ws.close()
+
+        return msg
+
+    def get_macthed_word(self, msg):
+        asr_result = msg['result']
+        time_stamp = msg['times']
+
+        for w in self.word_list:
+            for m in re.finditer(w, asr_result):
+                start = time_stamp[m.start(0)]['bg']
+                end = time_stamp[m.end(0) - 1]['ed']
+                logger.info(f'start: {start}, end: {end}')
+
+    def run(self, audio_data):
+        logger.info("start to process the audio content search")
+        msg = self.get_asr_content(io.BytesIO(audio_data))
+
+        self.get_macthed_word(msg)
diff --git a/paddlespeech/server/engine/engine_factory.py b/paddlespeech/server/engine/engine_factory.py
index 6cf95d75..5fdaacce 100644
--- a/paddlespeech/server/engine/engine_factory.py
+++ b/paddlespeech/server/engine/engine_factory.py
@@ -52,5 +52,8 @@ class EngineFactory(object):
         elif engine_name.lower() == 'vector' and engine_type.lower() == 'python':
             from paddlespeech.server.engine.vector.python.vector_engine import VectorEngine
             return VectorEngine()
+        elif engine_name.lower() == 'acs' and engine_type.lower() == 'python':
+            from paddlespeech.server.engine.acs.python.acs_engine import ACSEngine
+            return ACSEngine()
         else:
             return None
diff --git a/paddlespeech/server/engine/engine_pool.py b/paddlespeech/server/engine/engine_pool.py
index 9de73567..5300303f 100644
--- a/paddlespeech/server/engine/engine_pool.py
+++ b/paddlespeech/server/engine/engine_pool.py
@@ -34,6 +34,7 @@ def init_engine_pool(config) -> bool:
         engine_type = engine_and_type.split("_")[1]
         ENGINE_POOL[engine] = EngineFactory.get_engine(
             engine_name=engine, engine_type=engine_type)
+
         if not ENGINE_POOL[engine].init(config=config[engine_and_type]):
             return False
 
diff --git a/paddlespeech/server/restful/api.py b/paddlespeech/server/restful/api.py
index 63f865e8..1c2dd281 100644
--- a/paddlespeech/server/restful/api.py
+++ b/paddlespeech/server/restful/api.py
@@ -22,6 +22,7 @@ from paddlespeech.server.restful.cls_api import router as cls_router
 from paddlespeech.server.restful.text_api import router as text_router
 from paddlespeech.server.restful.tts_api import router as tts_router
 from paddlespeech.server.restful.vector_api import router as vec_router
+from paddlespeech.server.restful.acs_api import router as acs_router
 _router = APIRouter()
 
 
@@ -45,6 +46,8 @@ def setup_router(api_list: List):
             _router.include_router(text_router)
         elif api_name.lower() == 'vector':
             _router.include_router(vec_router)
+        elif api_name.lower() == 'acs':
+            _router.include_router(acs_router)
         else:
             logger.error(
                 f"PaddleSpeech has not support such service: {api_name}")
diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py
index b85cf485..0fcdd08a 100644
--- a/paddlespeech/server/utils/audio_handler.py
+++ b/paddlespeech/server/utils/audio_handler.py
@@ -96,7 +96,7 @@ class ASRWsAudioHandler:
         self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port)
         logger.info(f"endpoint: {self.url}")
 
-    def read_wave(self, wavfile_path: str):
+    def read_wave(self, wavfile_path):
         """read the audio file from specific wavfile path
 
         Args:
@@ -129,7 +129,7 @@ class ASRWsAudioHandler:
             x_chunk = padded_x[start:end]
             yield x_chunk
 
-    async def run(self, wavfile_path: str):
+    async def run(self, wavfile_path):
         """Send a audio file to online server
 
         Args:
@@ -205,7 +205,7 @@ class ASRWsAudioHandler:
 
 
 class ASRHttpHandler:
-    def __init__(self, server_ip=None, port=None):
+    def __init__(self, server_ip=None, port=None, endpoint="/paddlespeech/asr"):
         """The ASR client http request
 
         Args:
@@ -219,7 +219,7 @@ class ASRHttpHandler:
             self.url = None
         else:
             self.url = 'http://' + self.server_ip + ":" + str(
-                self.port) + '/paddlespeech/asr'
+                self.port) + endpoint
         logger.info(f"endpoint: {self.url}")
 
     def run(self, input, audio_format, sample_rate, lang):
@@ -248,7 +248,7 @@ class ASRHttpHandler:
         }
 
         res = requests.post(url=self.url, data=json.dumps(data))
-
+        
         return res.json()
 
 
diff --git a/paddlespeech/server/ws/asr_api.py b/paddlespeech/server/ws/asr_api.py
index 0f7dcddd..bf6e912c 100644
--- a/paddlespeech/server/ws/asr_api.py
+++ b/paddlespeech/server/ws/asr_api.py
@@ -12,15 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
-
+import base64
+from typing import Union
 from fastapi import APIRouter
 from fastapi import WebSocket
+import soundfile
+import io
 from fastapi import WebSocketDisconnect
 from starlette.websockets import WebSocketState as WebSocketState
 
+from paddlespeech.cli.log import logger
 from paddlespeech.server.engine.asr.online.asr_engine import PaddleASRConnectionHanddler
 from paddlespeech.server.engine.engine_pool import get_engine_pool
-
+from paddlespeech.server.restful.response import ASRResponse
+from paddlespeech.server.restful.response import ErrorResponse
+from paddlespeech.server.restful.request import ASRRequest
+from paddlespeech.server.utils.exception import ServerBaseException
+from paddlespeech.server.utils.errors import failed_response
+from paddlespeech.server.utils.errors import ErrorCode
 router = APIRouter()
 
 
@@ -106,5 +115,56 @@ async def websocket_endpoint(websocket: WebSocket):
                 # if the engine create the vad instance, this connection will have many period results 
                 resp = {'result': asr_results}
                 await websocket.send_json(resp)
-    except WebSocketDisconnect:
-        pass
+    except WebSocketDisconnect as e:
+        logger.error(e)
+
+
+# @router.post(
+#     "/paddlespeech/asr/search/", response_model=Union[ASRResponse, ErrorResponse])
+# def asr(request_body: ASRRequest):
+#     """asr api 
+
+#     Args:
+#         request_body (ASRRequest): [description]
+
+#     Returns:
+#         json: [description]
+#     """
+#     try:
+#         audio_data = base64.b64decode(request_body.audio)
+
+#         # get single engine from engine pool
+#         engine_pool = get_engine_pool()
+#         asr_engine = engine_pool['asr']
+
+#         samples, sample_rate = soundfile.read(io.BytesIO(audio_data), dtype='int16')
+#         # print(samples.shape)
+#         # print(sample_rate)
+#         connection_handler = PaddleASRConnectionHanddler(asr_engine)
+#         connection_handler.extract_feat(samples)
+        
+#         connection_handler.decode(is_finished=True)
+#         asr_results = connection_handler.rescoring()
+#         asr_results = connection_handler.get_result()
+#         word_time_stamp = connection_handler.get_word_time_stamp()
+
+#         response = {
+#             "success": True,
+#             "code": 200,
+#             "message": {
+#                 "description": "success"
+#             },
+#             "result": {
+#                 "transcription": asr_results,
+#                 "times": word_time_stamp
+#             }
+#         }
+
+        
+#     except ServerBaseException as e:
+#         response = failed_response(e.error_code, e.msg)
+#     except BaseException as e:
+#         response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
+#         print(e)
+
+#     return response
\ No newline at end of file