From 833900a8b4c0b2670ef01408751930359b94424f Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Mon, 25 Apr 2022 15:50:23 +0800 Subject: [PATCH 1/3] asr client add punctuatjion server, test=doc --- .../server/bin/paddlespeech_client.py | 154 ++++++++---------- paddlespeech/server/utils/audio_handler.py | 64 +++++++- 2 files changed, 130 insertions(+), 88 deletions(-) diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 1cc0a6ab..8cc384a1 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -16,7 +16,6 @@ import asyncio import base64 import io import json -import logging import os import random import time @@ -36,7 +35,7 @@ from paddlespeech.server.utils.util import wav2base64 __all__ = [ 'TTSClientExecutor', 'TTSOnlineClientExecutor', 'ASRClientExecutor', - 'ASROnlineClientExecutor', 'CLSClientExecutor' + 'CLSClientExecutor' ] @@ -288,6 +287,12 @@ class ASRClientExecutor(BaseExecutor): default=None, help='Audio file to be recognized', required=True) + self.parser.add_argument( + '--protocol', + type=str, + default="http", + choices=["http", "websocket"], + help='server protocol') self.parser.add_argument( '--sample_rate', type=int, default=16000, help='audio sample rate') self.parser.add_argument( @@ -295,81 +300,18 @@ class ASRClientExecutor(BaseExecutor): self.parser.add_argument( '--audio_format', type=str, default="wav", help='audio format') - def execute(self, argv: List[str]) -> bool: - args = self.parser.parse_args(argv) - input_ = args.input - server_ip = args.server_ip - port = args.port - sample_rate = args.sample_rate - lang = args.lang - audio_format = args.audio_format - - try: - time_start = time.time() - res = self( - input=input_, - server_ip=server_ip, - port=port, - sample_rate=sample_rate, - lang=lang, - audio_format=audio_format) - time_end = time.time() - logger.info(res.json()) - logger.info("Response time %f s." % (time_end - time_start)) - return True - except Exception as e: - logger.error("Failed to speech recognition.") - return False - - @stats_wrapper - def __call__(self, - input: str, - server_ip: str="127.0.0.1", - port: int=8090, - sample_rate: int=16000, - lang: str="zh_cn", - audio_format: str="wav"): - """ - Python API to call an executor. - """ - - url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/asr' - audio = wav2base64(input) - data = { - "audio": audio, - "audio_format": audio_format, - "sample_rate": sample_rate, - "lang": lang, - } - - res = requests.post(url=url, data=json.dumps(data)) - return res - - -@cli_client_register( - name='paddlespeech_client.asr_online', - description='visit asr online service') -class ASROnlineClientExecutor(BaseExecutor): - def __init__(self): - super(ASROnlineClientExecutor, self).__init__() - self.parser = argparse.ArgumentParser( - prog='paddlespeech_client.asr_online', add_help=True) - self.parser.add_argument( - '--server_ip', type=str, default='127.0.0.1', help='server ip') self.parser.add_argument( - '--port', type=int, default=8091, help='server port') - self.parser.add_argument( - '--input', + '--punc.server_ip', type=str, default=None, - help='Audio file to be recognized', - required=True) - self.parser.add_argument( - '--sample_rate', type=int, default=16000, help='audio sample rate') - self.parser.add_argument( - '--lang', type=str, default="zh_cn", help='language') + dest="punc_server_ip", + help='Punctuation server ip') self.parser.add_argument( - '--audio_format', type=str, default="wav", help='audio format') + '--punc.port', + type=int, + default=8091, + dest="punc_server_port", + help='Punctuation server port') def execute(self, argv: List[str]) -> bool: args = self.parser.parse_args(argv) @@ -379,6 +321,7 @@ class ASROnlineClientExecutor(BaseExecutor): sample_rate = args.sample_rate lang = args.lang audio_format = args.audio_format + protocol = args.protocol try: time_start = time.time() @@ -388,9 +331,12 @@ class ASROnlineClientExecutor(BaseExecutor): port=port, sample_rate=sample_rate, lang=lang, - audio_format=audio_format) + audio_format=audio_format, + protocol=protocol, + punc_server_ip=args.punc_server_ip, + punc_server_port=args.punc_server_port) time_end = time.time() - logger.info(res) + logger.info(f"ASR result: {res}") logger.info("Response time %f s." % (time_end - time_start)) return True except Exception as e: @@ -402,21 +348,55 @@ class ASROnlineClientExecutor(BaseExecutor): def __call__(self, input: str, server_ip: str="127.0.0.1", - port: int=8091, + port: int=8090, sample_rate: int=16000, lang: str="zh_cn", - audio_format: str="wav"): - """ - Python API to call an executor. + audio_format: str="wav", + protocol: str="http", + punc_server_ip: str="127.0.0.1", + punc_server_port: int=8091): + """Python API to call an executor. + + Args: + input (str): The input audio file path + server_ip (str, optional): The ASR server ip. Defaults to "127.0.0.1". + port (int, optional): The ASR server port. Defaults to 8090. + sample_rate (int, optional): The audio sample rate. Defaults to 16000. + lang (str, optional): The audio language type. Defaults to "zh_cn". + audio_format (str, optional): The audio format information. Defaults to "wav". + protocol (str, optional): The ASR server. Defaults to "http". + + Returns: + str: The ASR results """ - logging.basicConfig(level=logging.INFO) - logging.info("asr websocket client start") - handler = ASRAudioHandler(server_ip, port) - loop = asyncio.get_event_loop() - res = loop.run_until_complete(handler.run(input)) - logging.info("asr websocket client finished") - - return res['asr_results'] + # 1. Firstly, we use the asr server to recognize the audio text content + if protocol.lower() == "http": + from paddlespeech.server.utils.audio_handler import ASRHttpHandler + logger.info("asr http client start") + handler = ASRHttpHandler(server_ip=server_ip, port=port) + res = handler.run(input, audio_format, sample_rate, lang) + res = res['result']['transcription'] + logger.info("asr http client finished") + + elif protocol.lower() == "websocket": + logger.info("asr websocket client start") + handler = ASRAudioHandler( + server_ip, + port, + punc_server_ip=punc_server_ip, + punc_server_port=punc_server_port) + loop = asyncio.get_event_loop() + res = loop.run_until_complete(handler.run(input)) + res = res['asr_results'] + logger.info("asr websocket client finished") + else: + logger.error(f"Sorry, we have not support protocol: {protocol}," + "please use http or websocket protocol") + sys.exit(-1) + + # 2. Secondly, we use the punctuation server to do post process for text + + return res @cli_client_register( diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py index c2863115..28f963f7 100644 --- a/paddlespeech/server/utils/audio_handler.py +++ b/paddlespeech/server/utils/audio_handler.py @@ -24,20 +24,57 @@ import websockets from paddlespeech.cli.log import logger from paddlespeech.server.utils.audio_process import save_audio +from paddlespeech.server.utils.util import wav2base64 + + +class TextHttpHandler: + def __init__(self, server_ip="127.0.0.1", port=8090): + super().__init__() + self.server_ip = server_ip + self.port = port + self.url = 'http://' + self.server_ip + ":" + str( + self.port) + '/paddlespeech/text' + + def run(self, text): + if self.server_ip is None or self.port is None: + logger.warning( + "No punctuation server, please input valid ip and port") + return text + request = { + "text": text, + } + try: + res = requests.post(url=self.url, data=json.dumps(request)) + response_dict = res.json() + punc_text = response_dict["result"]["punc_text"] + except Exception as e: + logger.error(f"Call punctuation {self.url} occurs") + logger.error(e) + punc_text = text + + return punc_text class ASRAudioHandler: - def __init__(self, url="127.0.0.1", port=8090): + def __init__(self, + url="127.0.0.1", + port=8090, + punc_server_ip="127.0.0.1", + punc_server_port="8091"): """PaddleSpeech Online ASR Server Client audio handler Online asr server use the websocket protocal Args: url (str, optional): the server ip. Defaults to "127.0.0.1". port (int, optional): the server port. Defaults to 8090. + punc_server_ip(str, optional): the punctuation server ip. Defaults to None. + punc_server_port(int, optional): the punctuation port. Defaults to None """ self.url = url self.port = port self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" + self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port) + def read_wave(self, wavfile_path: str): """read the audio file from specific wavfile path @@ -102,6 +139,7 @@ class ASRAudioHandler: await ws.send(chunk_data.tobytes()) msg = await ws.recv() msg = json.loads(msg) + msg["asr_results"] = self.punc_server.run(msg["asr_results"]) logger.info("receive msg={}".format(msg)) # 4. we must send finished signal to the server @@ -119,11 +157,35 @@ class ASRAudioHandler: # 5. decode the bytes to str msg = json.loads(msg) + msg["asr_results"] = self.punc_server.run(msg["asr_results"]) logger.info("final receive msg={}".format(msg)) result = msg + return result +class ASRHttpHandler: + def __init__(self, server_ip="127.0.0.1", port=8090): + super().__init__() + self.server_ip = server_ip + self.port = port + self.url = 'http://' + self.server_ip + ":" + str( + self.port) + '/paddlespeech/asr' + + def run(self, input, audio_format, sample_rate, lang): + audio = wav2base64(input) + data = { + "audio": audio, + "audio_format": audio_format, + "sample_rate": sample_rate, + "lang": lang, + } + + res = requests.post(url=self.url, data=json.dumps(data)) + + return res.json() + + class TTSWsHandler: def __init__(self, server="127.0.0.1", port=8092, play: bool=False): """PaddleSpeech Online TTS Server Client audio handler From 7007b0ecac4844e38af0f0346b1421f2d8d68527 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Mon, 25 Apr 2022 16:50:41 +0800 Subject: [PATCH 2/3] update the asr server api, test=doc --- .../streaming_asr_server/websocket_client.py | 28 +++++- paddlespeech/cli/cls/infer.py | 4 +- .../server/bin/paddlespeech_client.py | 10 +-- .../tests/asr/online/microphone_client.py | 4 +- paddlespeech/server/utils/audio_handler.py | 89 +++++++++++++++---- paddlespeech/server/ws/asr_socket.py | 6 +- 6 files changed, 107 insertions(+), 34 deletions(-) diff --git a/demos/streaming_asr_server/websocket_client.py b/demos/streaming_asr_server/websocket_client.py index 2a15096c..5c632b79 100644 --- a/demos/streaming_asr_server/websocket_client.py +++ b/demos/streaming_asr_server/websocket_client.py @@ -20,19 +20,23 @@ import logging import os from paddlespeech.cli.log import logger -from paddlespeech.server.utils.audio_handler import ASRAudioHandler +from paddlespeech.server.utils.audio_handler import ASRWsAudioHandler def main(args): logger.info("asr websocket client start") - handler = ASRAudioHandler("127.0.0.1", 8090) + handler = ASRWsAudioHandler( + args.server_ip, + args.port, + punc_server_ip=args.punc_server_ip, + punc_server_port=args.punc_server_port) loop = asyncio.get_event_loop() # support to process single audio file if args.wavfile and os.path.exists(args.wavfile): logger.info(f"start to process the wavscp: {args.wavfile}") result = loop.run_until_complete(handler.run(args.wavfile)) - result = result["asr_results"] + result = result["final_result"] logger.info(f"asr websocket client finished : {result}") # support to process batch audios from wav.scp @@ -43,13 +47,29 @@ def main(args): for line in f: utt_name, utt_path = line.strip().split() result = loop.run_until_complete(handler.run(utt_path)) - result = result["asr_results"] + result = result["final_result"] w.write(f"{utt_name} {result}\n") if __name__ == "__main__": logger.info("Start to do streaming asr client") parser = argparse.ArgumentParser() + parser.add_argument( + '--server_ip', type=str, default='127.0.0.1', help='server ip') + parser.add_argument('--port', type=int, default=8090, help='server port') + parser.add_argument( + '--punc.server_ip', + type=str, + default=None, + dest="punc_server_ip", + help='Punctuation server ip') + parser.add_argument( + '--punc.port', + type=int, + default=8091, + dest="punc_server_port", + help='Punctuation server port') + parser.add_argument( "--wavfile", action="store", diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index 8b90f124..1f637a8f 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -21,8 +21,6 @@ from typing import Union import numpy as np import paddle import yaml -from paddleaudio import load -from paddleaudio.features import LogMelSpectrogram from ..executor import BaseExecutor from ..log import logger @@ -30,6 +28,8 @@ from ..utils import cli_register from ..utils import stats_wrapper from .pretrained_models import model_alias from .pretrained_models import pretrained_models +from paddleaudio import load +from paddleaudio.features import LogMelSpectrogram from paddlespeech.s2t.utils.dynamic_import import dynamic_import __all__ = ['CLSExecutor'] diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 8cc384a1..14847119 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -29,7 +29,7 @@ from ..executor import BaseExecutor from ..util import cli_client_register from ..util import stats_wrapper from paddlespeech.cli.log import logger -from paddlespeech.server.utils.audio_handler import ASRAudioHandler +from paddlespeech.server.utils.audio_handler import ASRWsAudioHandler from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.util import wav2base64 @@ -369,7 +369,7 @@ class ASRClientExecutor(BaseExecutor): Returns: str: The ASR results """ - # 1. Firstly, we use the asr server to recognize the audio text content + # we use the asr server to recognize the audio text content if protocol.lower() == "http": from paddlespeech.server.utils.audio_handler import ASRHttpHandler logger.info("asr http client start") @@ -380,22 +380,20 @@ class ASRClientExecutor(BaseExecutor): elif protocol.lower() == "websocket": logger.info("asr websocket client start") - handler = ASRAudioHandler( + handler = ASRWsAudioHandler( server_ip, port, punc_server_ip=punc_server_ip, punc_server_port=punc_server_port) loop = asyncio.get_event_loop() res = loop.run_until_complete(handler.run(input)) - res = res['asr_results'] + res = res['final_result'] logger.info("asr websocket client finished") else: logger.error(f"Sorry, we have not support protocol: {protocol}," "please use http or websocket protocol") sys.exit(-1) - # 2. Secondly, we use the punctuation server to do post process for text - return res diff --git a/paddlespeech/server/tests/asr/online/microphone_client.py b/paddlespeech/server/tests/asr/online/microphone_client.py index 2ceaf6d0..bb27e548 100644 --- a/paddlespeech/server/tests/asr/online/microphone_client.py +++ b/paddlespeech/server/tests/asr/online/microphone_client.py @@ -26,7 +26,7 @@ import pyaudio import websockets -class ASRAudioHandler(threading.Thread): +class ASRWsAudioHandler(threading.Thread): def __init__(self, url="127.0.0.1", port=8091): threading.Thread.__init__(self) self.url = url @@ -148,7 +148,7 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) logging.info("asr websocket client start") - handler = ASRAudioHandler("127.0.0.1", 8091) + handler = ASRWsAudioHandler("127.0.0.1", 8091) loop = asyncio.get_event_loop() main_task = asyncio.ensure_future(handler.run()) for signal in [SIGINT, SIGTERM]: diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py index 28f963f7..7df4a8e3 100644 --- a/paddlespeech/server/utils/audio_handler.py +++ b/paddlespeech/server/utils/audio_handler.py @@ -29,13 +29,30 @@ from paddlespeech.server.utils.util import wav2base64 class TextHttpHandler: def __init__(self, server_ip="127.0.0.1", port=8090): + """Text http client request + + Args: + server_ip (str, optional): the text server ip. Defaults to "127.0.0.1". + port (int, optional): the text server port. Defaults to 8090. + """ super().__init__() self.server_ip = server_ip self.port = port - self.url = 'http://' + self.server_ip + ":" + str( - self.port) + '/paddlespeech/text' + if server_ip is None or port is None: + self.url = None + else: + self.url = 'http://' + self.server_ip + ":" + str( + self.port) + '/paddlespeech/text' def run(self, text): + """Call the text server to process the specific text + + Args: + text (str): the text to be processed + + Returns: + str: punctuation text + """ if self.server_ip is None or self.port is None: logger.warning( "No punctuation server, please input valid ip and port") @@ -55,24 +72,29 @@ class TextHttpHandler: return punc_text -class ASRAudioHandler: +class ASRWsAudioHandler: def __init__(self, - url="127.0.0.1", - port=8090, - punc_server_ip="127.0.0.1", - punc_server_port="8091"): + url=None, + port=None, + endpoint="/paddlespeech/asr/streaming", + punc_server_ip=None, + punc_server_port=None): """PaddleSpeech Online ASR Server Client audio handler Online asr server use the websocket protocal Args: - url (str, optional): the server ip. Defaults to "127.0.0.1". - port (int, optional): the server port. Defaults to 8090. + url (str, optional): the server ip. Defaults to None. + port (int, optional): the server port. Defaults to None. + endpoint(str, optional): to compatiable with python server and c++ server. punc_server_ip(str, optional): the punctuation server ip. Defaults to None. punc_server_port(int, optional): the punctuation port. Defaults to None """ self.url = url self.port = port - self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" - + if url is None or port is None or endpoint is None: + self.url = None + else: + self.url = "ws://" + self.url + ":" + str( + self.port) + endpoint self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port) def read_wave(self, wavfile_path: str): @@ -117,6 +139,11 @@ class ASRAudioHandler: """ logging.info("send a message to the server") + if self.url is None: + logger.error( + "No punctuation server, please input valid ip and port") + return "" + # 1. send websocket handshake protocal async with websockets.connect(self.url) as ws: # 2. server has already received handshake protocal @@ -125,7 +152,7 @@ class ASRAudioHandler: { "name": "test.wav", "signal": "start", - "nbest": 5 + "nbest": 1 }, sort_keys=True, indent=4, @@ -139,7 +166,9 @@ class ASRAudioHandler: await ws.send(chunk_data.tobytes()) msg = await ws.recv() msg = json.loads(msg) - msg["asr_results"] = self.punc_server.run(msg["asr_results"]) + if self.punc_server and len(msg["partial_result"]) > 0: + msg["partial_result"] = self.punc_server.run( + msg["partial_result"]) logger.info("receive msg={}".format(msg)) # 4. we must send finished signal to the server @@ -157,7 +186,8 @@ class ASRAudioHandler: # 5. decode the bytes to str msg = json.loads(msg) - msg["asr_results"] = self.punc_server.run(msg["asr_results"]) + if self.punc_server: + msg["final_result"] = self.punc_server.run(msg["final_result"]) logger.info("final receive msg={}".format(msg)) result = msg @@ -165,14 +195,39 @@ class ASRAudioHandler: class ASRHttpHandler: - def __init__(self, server_ip="127.0.0.1", port=8090): + def __init__(self, server_ip=None, port=None): + """The ASR client http request + + Args: + server_ip (str, optional): the http asr server ip. Defaults to "127.0.0.1". + port (int, optional): the http asr server port. Defaults to 8090. + """ super().__init__() self.server_ip = server_ip self.port = port - self.url = 'http://' + self.server_ip + ":" + str( - self.port) + '/paddlespeech/asr' + if server_ip is None or port is None: + self.url = None + else: + self.url = 'http://' + self.server_ip + ":" + str( + self.port) + '/paddlespeech/asr' def run(self, input, audio_format, sample_rate, lang): + """Call the http asr to process the audio + + Args: + input (str): the audio file path + audio_format (str): the audio format + sample_rate (str): the audio sample rate + lang (str): the audio language type + + Returns: + str: the final asr result + """ + if self.url is None: + logger.error( + "No punctuation server, please input valid ip and port") + return "" + audio = wav2base64(input) data = { "audio": audio, diff --git a/paddlespeech/server/ws/asr_socket.py b/paddlespeech/server/ws/asr_socket.py index 10967f28..aebe46a2 100644 --- a/paddlespeech/server/ws/asr_socket.py +++ b/paddlespeech/server/ws/asr_socket.py @@ -24,7 +24,7 @@ from paddlespeech.server.engine.engine_pool import get_engine_pool router = APIRouter() -@router.websocket('/ws/asr') +@router.websocket('/paddlespeech/asr/streaming') async def websocket_endpoint(websocket: WebSocket): """PaddleSpeech Online ASR Server api @@ -83,7 +83,7 @@ async def websocket_endpoint(websocket: WebSocket): resp = { "status": "ok", "signal": "finished", - 'asr_results': asr_results + 'final_result': asr_results } await websocket.send_json(resp) break @@ -102,7 +102,7 @@ async def websocket_endpoint(websocket: WebSocket): # return the current period result # if the engine create the vad instance, this connection will have many period results - resp = {'asr_results': asr_results} + resp = {'partial_result': asr_results} await websocket.send_json(resp) except WebSocketDisconnect: pass From 9125cb076d504f3c5e779183ef970acb41d9558e Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Mon, 25 Apr 2022 17:40:46 +0800 Subject: [PATCH 3/3] update the ws asr response, final_result to result, test=doc --- demos/streaming_asr_server/web/templates/index.html | 4 ++-- paddlespeech/server/bin/paddlespeech_client.py | 2 +- paddlespeech/server/utils/audio_handler.py | 12 +++++------- paddlespeech/server/ws/asr_socket.py | 4 ++-- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/demos/streaming_asr_server/web/templates/index.html b/demos/streaming_asr_server/web/templates/index.html index 7aa227fb..56c63080 100644 --- a/demos/streaming_asr_server/web/templates/index.html +++ b/demos/streaming_asr_server/web/templates/index.html @@ -93,7 +93,7 @@ function parseResult(data) { var data = JSON.parse(data) - var result = data.asr_results + var result = data.result console.log(result) $("#resultPanel").html(result) } @@ -152,4 +152,4 @@ - \ No newline at end of file + diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 14847119..715e64a0 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -387,7 +387,7 @@ class ASRClientExecutor(BaseExecutor): punc_server_port=punc_server_port) loop = asyncio.get_event_loop() res = loop.run_until_complete(handler.run(input)) - res = res['final_result'] + res = res['result'] logger.info("asr websocket client finished") else: logger.error(f"Sorry, we have not support protocol: {protocol}," diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py index 7df4a8e3..3c924d18 100644 --- a/paddlespeech/server/utils/audio_handler.py +++ b/paddlespeech/server/utils/audio_handler.py @@ -54,8 +54,6 @@ class TextHttpHandler: str: punctuation text """ if self.server_ip is None or self.port is None: - logger.warning( - "No punctuation server, please input valid ip and port") return text request = { "text": text, @@ -141,7 +139,7 @@ class ASRWsAudioHandler: if self.url is None: logger.error( - "No punctuation server, please input valid ip and port") + "No asr server, please input valid ip and port") return "" # 1. send websocket handshake protocal @@ -166,9 +164,9 @@ class ASRWsAudioHandler: await ws.send(chunk_data.tobytes()) msg = await ws.recv() msg = json.loads(msg) - if self.punc_server and len(msg["partial_result"]) > 0: - msg["partial_result"] = self.punc_server.run( - msg["partial_result"]) + if self.punc_server and len(msg["result"]) > 0: + msg["result"] = self.punc_server.run( + msg["result"]) logger.info("receive msg={}".format(msg)) # 4. we must send finished signal to the server @@ -187,7 +185,7 @@ class ASRWsAudioHandler: # 5. decode the bytes to str msg = json.loads(msg) if self.punc_server: - msg["final_result"] = self.punc_server.run(msg["final_result"]) + msg["result"] = self.punc_server.run(msg["result"]) logger.info("final receive msg={}".format(msg)) result = msg diff --git a/paddlespeech/server/ws/asr_socket.py b/paddlespeech/server/ws/asr_socket.py index aebe46a2..68686d3d 100644 --- a/paddlespeech/server/ws/asr_socket.py +++ b/paddlespeech/server/ws/asr_socket.py @@ -83,7 +83,7 @@ async def websocket_endpoint(websocket: WebSocket): resp = { "status": "ok", "signal": "finished", - 'final_result': asr_results + 'result': asr_results } await websocket.send_json(resp) break @@ -102,7 +102,7 @@ async def websocket_endpoint(websocket: WebSocket): # return the current period result # if the engine create the vad instance, this connection will have many period results - resp = {'partial_result': asr_results} + resp = {'result': asr_results} await websocket.send_json(resp) except WebSocketDisconnect: pass