diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py index 3a864a78..dce7d09d 100644 --- a/paddlespeech/server/utils/audio_handler.py +++ b/paddlespeech/server/utils/audio_handler.py @@ -1,28 +1,53 @@ -import numpy as np -import logging -import argparse -import asyncio -import codecs +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import logging -import os import numpy as np import soundfile import websockets + from paddlespeech.cli.log import logger + + class ASRAudioHandler: def __init__(self, url="127.0.0.1", port=8090): + """PaddleSpeech Online ASR Server Client audio handler + Online asr server use the websocket protocal + Args: + url (str, optional): the server ip. Defaults to "127.0.0.1". + port (int, optional): the server port. Defaults to 8090. + """ self.url = url self.port = port self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" def read_wave(self, wavfile_path: str): + """read the audio file from specific wavfile path + + Args: + wavfile_path (str): the audio wavfile, + we assume that audio sample rate matches the model + + Yields: + numpy.array: the samall package audio pcm data + """ samples, sample_rate = soundfile.read(wavfile_path, dtype='int16') x_len = len(samples) chunk_size = 85 * 16 #80ms, sample_rate = 16kHz - if x_len % chunk_size!= 0: + if x_len % chunk_size != 0: padding_len_x = chunk_size - x_len % chunk_size else: padding_len_x = 0 @@ -40,11 +65,19 @@ class ASRAudioHandler: yield x_chunk async def run(self, wavfile_path: str): + """Send a audio file to online server + + Args: + wavfile_path (str): audio path + + Returns: + str: the final asr result + """ logging.info("send a message to the server") - # self.read_wave() - # send websocket handshake protocal + + # 1. send websocket handshake protocal async with websockets.connect(self.url) as ws: - # server has already received handshake protocal + # 2. server has already received handshake protocal # client start to send the command audio_info = json.dumps( { @@ -59,14 +92,14 @@ class ASRAudioHandler: msg = await ws.recv() logger.info("receive msg={}".format(msg)) - # send chunk audio data to engine + # 3. send chunk audio data to engine for chunk_data in self.read_wave(wavfile_path): await ws.send(chunk_data.tobytes()) msg = await ws.recv() msg = json.loads(msg) logger.info("receive msg={}".format(msg)) - # finished + # 4. we must send finished signal to the server audio_info = json.dumps( { "name": "test.wav", @@ -78,9 +111,9 @@ class ASRAudioHandler: separators=(',', ': ')) await ws.send(audio_info) msg = await ws.recv() - - # decode the bytes to str + + # 5. decode the bytes to str msg = json.loads(msg) logger.info("final receive msg={}".format(msg)) result = msg - return result \ No newline at end of file + return result