# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import random import time from time import gmtime, strftime import socketserver import struct import wave from deepspeech.frontend.utility import read_manifest __all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"] def socket_send(server_ip: str, server_port: str, data: bytes): # Connect to server and send data sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((server_ip, server_port)) sent = data sock.sendall(struct.pack('>i', len(sent)) + sent) print('Speech[length=%d] Sent.' % len(sent)) # Receive data from the server and shut down received = sock.recv(1024) print("Recognition Results: {}".format(received.decode('utf8'))) sock.close() def warm_up_test(audio_process_handler, manifest_path, num_test_cases, random_seed=0): """Warming-up test.""" manifest = read_manifest(manifest_path) rng = random.Random(random_seed) samples = rng.sample(manifest, num_test_cases) for idx, sample in enumerate(samples): print("Warm-up Test Case %d: %s", idx, sample['audio_filepath']) start_time = time.time() transcript = audio_process_handler(sample['audio_filepath']) finish_time = time.time() print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript)) class AsrTCPServer(socketserver.TCPServer): """The ASR TCP Server.""" def __init__(self, server_address, RequestHandlerClass, speech_save_dir, audio_process_handler, bind_and_activate=True): self.speech_save_dir = speech_save_dir self.audio_process_handler = audio_process_handler socketserver.TCPServer.__init__( self, server_address, RequestHandlerClass, bind_and_activate=True) class AsrRequestHandler(socketserver.BaseRequestHandler): """The ASR request handler.""" def handle(self): # receive data through TCP socket chunk = self.request.recv(1024) target_len = struct.unpack('>i', chunk[:4])[0] data = chunk[4:] while len(data) < target_len: chunk = self.request.recv(1024) data += chunk # write to file filename = self._write_to_file(data) print("Received utterance[length=%d] from %s, saved to %s." % (len(data), self.client_address[0], filename)) start_time = time.time() transcript = self.server.audio_process_handler(filename) finish_time = time.time() print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript)) self.request.sendall(transcript.encode('utf-8')) def _write_to_file(self, data): # prepare save dir and filename if not os.path.exists(self.server.speech_save_dir): os.mkdir(self.server.speech_save_dir) timestamp = strftime("%Y%m%d%H%M%S", gmtime()) out_filename = os.path.join( self.server.speech_save_dir, timestamp + "_" + self.client_address[0] + ".wav") # write to wav file file = wave.open(out_filename, 'wb') file.setnchannels(1) file.setsampwidth(2) file.setframerate(16000) file.writeframes(data) file.close() return out_filename