You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
112 lines
3.9 KiB
112 lines
3.9 KiB
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
import random
|
|
import time
|
|
from time import gmtime, strftime
|
|
import socketserver
|
|
import struct
|
|
import wave
|
|
|
|
from deepspeech.frontend.utility import read_manifest
|
|
|
|
__all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"]
|
|
|
|
|
|
def socket_send(server_ip: str, server_port: str, data: bytes):
|
|
# Connect to server and send data
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
sock.connect((server_ip, server_port))
|
|
sent = data
|
|
sock.sendall(struct.pack('>i', len(sent)) + sent)
|
|
print('Speech[length=%d] Sent.' % len(sent))
|
|
# Receive data from the server and shut down
|
|
received = sock.recv(1024)
|
|
print("Recognition Results: {}".format(received.decode('utf8')))
|
|
sock.close()
|
|
|
|
|
|
def warm_up_test(audio_process_handler,
|
|
manifest_path,
|
|
num_test_cases,
|
|
random_seed=0):
|
|
"""Warming-up test."""
|
|
manifest = read_manifest(manifest_path)
|
|
rng = random.Random(random_seed)
|
|
samples = rng.sample(manifest, num_test_cases)
|
|
for idx, sample in enumerate(samples):
|
|
print("Warm-up Test Case %d: %s", idx, sample['audio_filepath'])
|
|
start_time = time.time()
|
|
transcript = audio_process_handler(sample['audio_filepath'])
|
|
finish_time = time.time()
|
|
print("Response Time: %f, Transcript: %s" %
|
|
(finish_time - start_time, transcript))
|
|
|
|
|
|
class AsrTCPServer(socketserver.TCPServer):
|
|
"""The ASR TCP Server."""
|
|
|
|
def __init__(self,
|
|
server_address,
|
|
RequestHandlerClass,
|
|
speech_save_dir,
|
|
audio_process_handler,
|
|
bind_and_activate=True):
|
|
self.speech_save_dir = speech_save_dir
|
|
self.audio_process_handler = audio_process_handler
|
|
socketserver.TCPServer.__init__(
|
|
self, server_address, RequestHandlerClass, bind_and_activate=True)
|
|
|
|
|
|
class AsrRequestHandler(socketserver.BaseRequestHandler):
|
|
"""The ASR request handler."""
|
|
|
|
def handle(self):
|
|
# receive data through TCP socket
|
|
chunk = self.request.recv(1024)
|
|
target_len = struct.unpack('>i', chunk[:4])[0]
|
|
data = chunk[4:]
|
|
while len(data) < target_len:
|
|
chunk = self.request.recv(1024)
|
|
data += chunk
|
|
# write to file
|
|
filename = self._write_to_file(data)
|
|
|
|
print("Received utterance[length=%d] from %s, saved to %s." %
|
|
(len(data), self.client_address[0], filename))
|
|
start_time = time.time()
|
|
transcript = self.server.audio_process_handler(filename)
|
|
finish_time = time.time()
|
|
print("Response Time: %f, Transcript: %s" %
|
|
(finish_time - start_time, transcript))
|
|
self.request.sendall(transcript.encode('utf-8'))
|
|
|
|
def _write_to_file(self, data):
|
|
# prepare save dir and filename
|
|
if not os.path.exists(self.server.speech_save_dir):
|
|
os.mkdir(self.server.speech_save_dir)
|
|
timestamp = strftime("%Y%m%d%H%M%S", gmtime())
|
|
out_filename = os.path.join(
|
|
self.server.speech_save_dir,
|
|
timestamp + "_" + self.client_address[0] + ".wav")
|
|
# write to wav file
|
|
file = wave.open(out_filename, 'wb')
|
|
file.setnchannels(1)
|
|
file.setsampwidth(2)
|
|
file.setframerate(16000)
|
|
file.writeframes(data)
|
|
file.close()
|
|
return out_filename
|