Merge pull request #1732 from SmileGoat/add_websocketlib

[speechx]add websocket lib
pull/1735/head
Hui Zhang 3 years ago committed by GitHub
commit d90a277dfa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,7 @@
project(websocket)
add_library(websocket STATIC
websocket_server.cc
websocket_client.cc
)
target_link_libraries(websocket PUBLIC frontend decoder nnet)

@ -0,0 +1,105 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "websocket/websocket_client.h"
#include "boost/json/src.hpp"
namespace json = boost::json;
namespace ppspeech {
WebSocketClient::WebSocketClient(const std::string& host, int port)
: host_(host), port_(port) {
Connect();
t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
}
void WebSocketClient::Connect() {
tcp::resolver resolver{ioc_};
// Look up the domain name
auto const results = resolver.resolve(host_, std::to_string(port_));
// Make the connection on the IP address we get from a lookup
auto ep = asio::connect(ws_.next_layer(), results);
// Update the host_ string. This will provide the value of the
// Host HTTP header during the WebSocket handshake.
// See https://tools.ietf.org/html/rfc7230#section-5.4
std::string host = host_ + ":" + std::to_string(ep.port());
// Perform the websocket handshake
ws_.handshake(host, "/");
}
void WebSocketClient::SendTextData(const std::string& data) {
ws_.text(true);
ws_.write(asio::buffer(data));
}
void WebSocketClient::SendBinaryData(const void* data, size_t size) {
ws_.binary(true);
ws_.write(asio::buffer(data, size));
}
void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }
void WebSocketClient::ReadLoopFunc() {
try {
while (true) {
beast::flat_buffer buffer;
ws_.read(buffer);
std::string message = beast::buffers_to_string(buffer.data());
LOG(INFO) << message;
CHECK(ws_.got_text());
json::object obj = json::parse(message).as_object();
if (obj["status"] != "ok") {
break;
}
if (obj["type"] == "final_result") {
result_ = obj["result"].as_string().c_str();
}
if (obj["type"] == "speech_end") {
done_ = true;
break;
}
}
} catch (beast::system_error const& se) {
// This indicates that the session was closed
if (se.code() != websocket::error::closed) {
LOG(ERROR) << se.code().message();
}
} catch (std::exception const& e) {
LOG(ERROR) << e.what();
}
}
void WebSocketClient::Join() { t_->join(); }
void WebSocketClient::SendStartSignal() {
json::value start_tag = {{"signal", "start"}};
std::string start_message = json::serialize(start_tag);
this->SendTextData(start_message);
}
void WebSocketClient::SendDataEnd() {
json::value end_tag = {{"data", "end"}};
std::string end_message = json::serialize(end_tag);
this->SendTextData(end_message);
}
void WebSocketClient::SendEndSignal() {
json::value end_tag = {{"signal", "end"}};
std::string end_message = json::serialize(end_tag);
this->SendTextData(end_message);
}
} // namespace ppspeech

@ -0,0 +1,55 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "base/common.h"
#include "boost/asio/connect.hpp"
#include "boost/asio/ip/tcp.hpp"
#include "boost/beast/core.hpp"
#include "boost/beast/websocket.hpp"
namespace beast = boost::beast; // from <boost/beast.hpp>
namespace http = beast::http; // from <boost/beast/http.hpp>
namespace websocket = beast::websocket; // from <boost/beast/websocket.hpp>
namespace asio = boost::asio; // from <boost/asio.hpp>
using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
namespace ppspeech {
class WebSocketClient {
public:
WebSocketClient(const std::string& host, int port);
void SendTextData(const std::string& data);
void SendBinaryData(const void* data, size_t size);
void ReadLoopFunc();
void Close();
void Join();
void SendStartSignal();
void SendEndSignal();
void SendDataEnd();
bool Done() const { return done_; }
std::string GetResult() { return result_; }
private:
void Connect();
std::string host_;
std::string result_;
int port_;
bool done_ = false;
asio::io_context ioc_;
websocket::stream<tcp::socket> ws_{ioc_};
std::unique_ptr<std::thread> t_{nullptr};
};
}

@ -0,0 +1,192 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "websocket/websocket_server.h"
#include "base/common.h"
#include "boost/json/src.hpp"
namespace json = boost::json;
namespace ppspeech {
ConnectionHandler::ConnectionHandler(
tcp::socket&& socket, const RecognizerResource& recognizer_resource)
: ws_(std::move(socket)), recognizer_resource_(recognizer_resource) {}
void ConnectionHandler::OnSpeechStart() {
LOG(INFO) << "Recieved speech start signal, start reading speech";
got_start_tag_ = true;
json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
ws_.text(true);
ws_.write(asio::buffer(json::serialize(rv)));
recognizer_ = std::make_shared<Recognizer>(recognizer_resource_);
// Start decoder thread
decode_thread_ = std::make_shared<std::thread>(
&ConnectionHandler::DecodeThreadFunc, this);
}
void ConnectionHandler::OnSpeechEnd() {
LOG(INFO) << "Recieved speech end signal";
CHECK(recognizer_ != nullptr);
recognizer_->SetFinished();
got_end_tag_ = true;
}
void ConnectionHandler::OnFinalResult(const std::string& result) {
LOG(INFO) << "Final result: " << result;
json::value rv = {
{"status", "ok"}, {"type", "final_result"}, {"result", result}};
ws_.text(true);
ws_.write(asio::buffer(json::serialize(rv)));
}
void ConnectionHandler::OnFinish() {
// Send finish tag
json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
ws_.text(true);
ws_.write(asio::buffer(json::serialize(rv)));
}
void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
// Read binary PCM data
int num_samples = buffer.size() / sizeof(int16_t);
kaldi::Vector<kaldi::BaseFloat> pcm_data(num_samples);
const int16_t* pdata = static_cast<const int16_t*>(buffer.data().data());
for (int i = 0; i < num_samples; i++) {
pcm_data(i) = static_cast<float>(*pdata);
pdata++;
}
VLOG(2) << "Recieved " << num_samples << " samples";
LOG(INFO) << "Recieved " << num_samples << " samples";
CHECK(recognizer_ != nullptr);
recognizer_->Accept(pcm_data);
}
void ConnectionHandler::DecodeThreadFunc() {
try {
while (true) {
recognizer_->Decode();
if (recognizer_->IsFinished()) {
LOG(INFO) << "enter finish";
recognizer_->Decode();
LOG(INFO) << "finish";
std::string result = recognizer_->GetFinalResult();
OnFinalResult(result);
OnFinish();
stop_recognition_ = true;
break;
}
}
} catch (std::exception const& e) {
LOG(ERROR) << e.what();
}
}
void ConnectionHandler::OnError(const std::string& message) {
json::value rv = {{"status", "failed"}, {"message", message}};
ws_.text(true);
ws_.write(asio::buffer(json::serialize(rv)));
// Close websocket
ws_.close(websocket::close_code::normal);
}
void ConnectionHandler::OnText(const std::string& message) {
json::value v = json::parse(message);
if (v.is_object()) {
json::object obj = v.get_object();
if (obj.find("signal") != obj.end()) {
json::string signal = obj["signal"].as_string();
if (signal == "start") {
OnSpeechStart();
} else if (signal == "end") {
OnSpeechEnd();
} else {
OnError("Unexpected signal type");
}
} else {
OnError("Wrong message header");
}
} else {
OnError("Wrong protocol");
}
}
void ConnectionHandler::operator()() {
try {
// Accept the websocket handshake
ws_.accept();
for (;;) {
// This buffer will hold the incoming message
beast::flat_buffer buffer;
// Read a message
ws_.read(buffer);
if (ws_.got_text()) {
std::string message = beast::buffers_to_string(buffer.data());
LOG(INFO) << message;
OnText(message);
if (got_end_tag_) {
break;
}
} else {
if (!got_start_tag_) {
OnError("Start signal is expected before binary data");
} else {
if (stop_recognition_) {
break;
}
OnSpeechData(buffer);
}
}
}
LOG(INFO) << "Read all pcm data, wait for decoding thread";
if (decode_thread_ != nullptr) {
decode_thread_->join();
}
} catch (beast::system_error const& se) {
// This indicates that the session was closed
if (se.code() != websocket::error::closed) {
if (decode_thread_ != nullptr) {
decode_thread_->join();
}
OnSpeechEnd();
LOG(ERROR) << se.code().message();
}
} catch (std::exception const& e) {
LOG(ERROR) << e.what();
}
}
void WebSocketServer::Start() {
try {
auto const address = asio::ip::make_address("0.0.0.0");
tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
for (;;) {
// This will receive the new connection
tcp::socket socket{ioc_};
// Block until we get a connection
acceptor.accept(socket);
// Launch the session, transferring ownership of the socket
ConnectionHandler handler(std::move(socket), recognizer_resource_);
std::thread t(std::move(handler));
t.detach();
}
} catch (const std::exception& e) {
LOG(FATAL) << e.what();
}
}
} // namespace ppspeech

@ -0,0 +1,80 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/common.h"
#include "boost/asio/connect.hpp"
#include "boost/asio/ip/tcp.hpp"
#include "boost/beast/core.hpp"
#include "boost/beast/websocket.hpp"
#include "decoder/recognizer.h"
#include "frontend/audio/feature_pipeline.h"
namespace beast = boost::beast; // from <boost/beast.hpp>
namespace http = beast::http; // from <boost/beast/http.hpp>
namespace websocket = beast::websocket; // from <boost/beast/websocket.hpp>
namespace asio = boost::asio; // from <boost/asio.hpp>
using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
namespace ppspeech {
class ConnectionHandler {
public:
ConnectionHandler(tcp::socket&& socket,
const RecognizerResource& recognizer_resource_);
void operator()();
private:
void OnSpeechStart();
void OnSpeechEnd();
void OnText(const std::string& message);
void OnFinish();
void OnSpeechData(const beast::flat_buffer& buffer);
void OnError(const std::string& message);
void OnPartialResult(const std::string& result);
void OnFinalResult(const std::string& result);
void DecodeThreadFunc();
std::string SerializeResult(bool finish);
bool continuous_decoding_ = false;
int nbest_ = 1;
websocket::stream<tcp::socket> ws_;
RecognizerResource recognizer_resource_;
bool got_start_tag_ = false;
bool got_end_tag_ = false;
// When endpoint is detected, stop recognition, and stop receiving data.
bool stop_recognition_ = false;
std::shared_ptr<ppspeech::Recognizer> recognizer_ = nullptr;
std::shared_ptr<std::thread> decode_thread_ = nullptr;
};
class WebSocketServer {
public:
WebSocketServer(int port, const RecognizerResource& recognizer_resource)
: port_(port), recognizer_resource_(recognizer_resource) {}
void Start();
private:
int port_;
RecognizerResource recognizer_resource_;
// The io_context is required for all I/O
asio::io_context ioc_{1};
};
} // namespace ppspeech
Loading…
Cancel
Save