From fc96130fdc74002aff4ebd4cd8bea856c421f4ae Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 27 Apr 2022 12:28:27 +0000 Subject: [PATCH] fix speechx core dump when stop immediately after start --- paddlespeech/cli/cls/infer.py | 4 ++-- paddlespeech/s2t/__init__.py | 2 -- paddlespeech/server/bin/paddlespeech_client.py | 2 +- paddlespeech/server/utils/audio_handler.py | 11 ++++------- speechx/speechx/decoder/ctc_tlg_decoder.cc | 6 ++++++ speechx/speechx/websocket/websocket_server.cc | 17 +++++++++-------- 6 files changed, 22 insertions(+), 20 deletions(-) diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index 1f637a8f..8b90f124 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -21,6 +21,8 @@ from typing import Union import numpy as np import paddle import yaml +from paddleaudio import load +from paddleaudio.features import LogMelSpectrogram from ..executor import BaseExecutor from ..log import logger @@ -28,8 +30,6 @@ from ..utils import cli_register from ..utils import stats_wrapper from .pretrained_models import model_alias from .pretrained_models import pretrained_models -from paddleaudio import load -from paddleaudio.features import LogMelSpectrogram from paddlespeech.s2t.utils.dynamic_import import dynamic_import __all__ = ['CLSExecutor'] diff --git a/paddlespeech/s2t/__init__.py b/paddlespeech/s2t/__init__.py index 29402fc4..2365071f 100644 --- a/paddlespeech/s2t/__init__.py +++ b/paddlespeech/s2t/__init__.py @@ -325,7 +325,6 @@ if not hasattr(paddle.Tensor, 'type_as'): setattr(paddle.static.Variable, 'type_as', type_as) - def to(x: paddle.Tensor, *args, **kwargs) -> paddle.Tensor: assert len(args) == 1 if isinstance(args[0], str): # dtype @@ -372,7 +371,6 @@ if not hasattr(paddle.Tensor, 'tolist'): "register user tolist to paddle.Tensor, remove this when fixed!") setattr(paddle.Tensor, 'tolist', tolist) setattr(paddle.static.Variable, 'tolist', tolist) - ########### hack paddle.nn ############# from paddle.nn import Layer diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index a424c82f..1d8fb5ee 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -521,4 +521,4 @@ class TextClientExecutor(BaseExecutor): res = requests.post(url=url, data=json.dumps(request)) response_dict = res.json() punc_text = response_dict["result"]["punc_text"] - return punc_text \ No newline at end of file + return punc_text diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py index b9f3b87f..f0ec0eaa 100644 --- a/paddlespeech/server/utils/audio_handler.py +++ b/paddlespeech/server/utils/audio_handler.py @@ -91,8 +91,7 @@ class ASRWsAudioHandler: if url is None or port is None or endpoint is None: self.url = None else: - self.url = "ws://" + self.url + ":" + str( - self.port) + endpoint + self.url = "ws://" + self.url + ":" + str(self.port) + endpoint self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port) logger.info(f"endpoint: {self.url}") @@ -139,8 +138,7 @@ class ASRWsAudioHandler: logging.info("send a message to the server") if self.url is None: - logger.error( - "No asr server, please input valid ip and port") + logger.error("No asr server, please input valid ip and port") return "" # 1. send websocket handshake protocal @@ -167,8 +165,7 @@ class ASRWsAudioHandler: msg = json.loads(msg) if self.punc_server and len(msg["result"]) > 0: - msg["result"] = self.punc_server.run( - msg["result"]) + msg["result"] = self.punc_server.run(msg["result"]) logger.info("client receive msg={}".format(msg)) # 4. we must send finished signal to the server @@ -189,7 +186,7 @@ class ASRWsAudioHandler: if self.punc_server: msg["result"] = self.punc_server.run(msg["result"]) - + logger.info("client final receive msg={}".format(msg)) result = msg diff --git a/speechx/speechx/decoder/ctc_tlg_decoder.cc b/speechx/speechx/decoder/ctc_tlg_decoder.cc index 7b720e7b..02e64316 100644 --- a/speechx/speechx/decoder/ctc_tlg_decoder.cc +++ b/speechx/speechx/decoder/ctc_tlg_decoder.cc @@ -48,6 +48,12 @@ void TLGDecoder::Reset() { } std::string TLGDecoder::GetFinalBestPath() { + if (frame_decoded_size_ == 0) { + // Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call + // BestPathEnd if no frames were decoded.") + return std::string(""); + } + decoder_->FinalizeDecoding(); kaldi::Lattice lat; kaldi::LatticeWeight weight; diff --git a/speechx/speechx/websocket/websocket_server.cc b/speechx/speechx/websocket/websocket_server.cc index 62d3d9e0..71a9e127 100644 --- a/speechx/speechx/websocket/websocket_server.cc +++ b/speechx/speechx/websocket/websocket_server.cc @@ -27,21 +27,22 @@ ConnectionHandler::ConnectionHandler( : ws_(std::move(socket)), recognizer_resource_(recognizer_resource) {} void ConnectionHandler::OnSpeechStart() { - LOG(INFO) << "Server: Recieved speech start signal, start reading speech"; - got_start_tag_ = true; - json::value rv = {{"status", "ok"}, {"type", "server_ready"}}; - ws_.text(true); - ws_.write(asio::buffer(json::serialize(rv))); recognizer_ = std::make_shared(recognizer_resource_); // Start decoder thread decode_thread_ = std::make_shared( &ConnectionHandler::DecodeThreadFunc, this); + got_start_tag_ = true; + LOG(INFO) << "Server: Recieved speech start signal, start reading speech"; + json::value rv = {{"status", "ok"}, {"type", "server_ready"}}; + ws_.text(true); + ws_.write(asio::buffer(json::serialize(rv))); } void ConnectionHandler::OnSpeechEnd() { LOG(INFO) << "Server: Recieved speech end signal"; - CHECK(recognizer_ != nullptr); - recognizer_->SetFinished(); + if (recognizer_ != nullptr) { + recognizer_->SetFinished(); + } got_end_tag_ = true; } @@ -158,7 +159,7 @@ void ConnectionHandler::operator()() { } } - LOG(INFO) << "Server: Read all pcm data, wait for decoding thread"; + LOG(INFO) << "Server: finished to wait for decoding thread join."; if (decode_thread_ != nullptr) { decode_thread_->join(); }