commit
d90a277dfa
@ -0,0 +1,7 @@
|
||||
project(websocket)
|
||||
|
||||
add_library(websocket STATIC
|
||||
websocket_server.cc
|
||||
websocket_client.cc
|
||||
)
|
||||
target_link_libraries(websocket PUBLIC frontend decoder nnet)
|
@ -0,0 +1,105 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "websocket/websocket_client.h"
|
||||
|
||||
#include "boost/json/src.hpp"
|
||||
|
||||
namespace json = boost::json;
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
WebSocketClient::WebSocketClient(const std::string& host, int port)
|
||||
: host_(host), port_(port) {
|
||||
Connect();
|
||||
t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
|
||||
}
|
||||
|
||||
void WebSocketClient::Connect() {
|
||||
tcp::resolver resolver{ioc_};
|
||||
// Look up the domain name
|
||||
auto const results = resolver.resolve(host_, std::to_string(port_));
|
||||
// Make the connection on the IP address we get from a lookup
|
||||
auto ep = asio::connect(ws_.next_layer(), results);
|
||||
// Update the host_ string. This will provide the value of the
|
||||
// Host HTTP header during the WebSocket handshake.
|
||||
// See https://tools.ietf.org/html/rfc7230#section-5.4
|
||||
std::string host = host_ + ":" + std::to_string(ep.port());
|
||||
// Perform the websocket handshake
|
||||
ws_.handshake(host, "/");
|
||||
}
|
||||
|
||||
void WebSocketClient::SendTextData(const std::string& data) {
|
||||
ws_.text(true);
|
||||
ws_.write(asio::buffer(data));
|
||||
}
|
||||
|
||||
void WebSocketClient::SendBinaryData(const void* data, size_t size) {
|
||||
ws_.binary(true);
|
||||
ws_.write(asio::buffer(data, size));
|
||||
}
|
||||
|
||||
void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }
|
||||
|
||||
void WebSocketClient::ReadLoopFunc() {
|
||||
try {
|
||||
while (true) {
|
||||
beast::flat_buffer buffer;
|
||||
ws_.read(buffer);
|
||||
std::string message = beast::buffers_to_string(buffer.data());
|
||||
LOG(INFO) << message;
|
||||
CHECK(ws_.got_text());
|
||||
json::object obj = json::parse(message).as_object();
|
||||
if (obj["status"] != "ok") {
|
||||
break;
|
||||
}
|
||||
if (obj["type"] == "final_result") {
|
||||
result_ = obj["result"].as_string().c_str();
|
||||
}
|
||||
if (obj["type"] == "speech_end") {
|
||||
done_ = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (beast::system_error const& se) {
|
||||
// This indicates that the session was closed
|
||||
if (se.code() != websocket::error::closed) {
|
||||
LOG(ERROR) << se.code().message();
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
LOG(ERROR) << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
void WebSocketClient::Join() { t_->join(); }
|
||||
|
||||
void WebSocketClient::SendStartSignal() {
|
||||
json::value start_tag = {{"signal", "start"}};
|
||||
std::string start_message = json::serialize(start_tag);
|
||||
this->SendTextData(start_message);
|
||||
}
|
||||
|
||||
void WebSocketClient::SendDataEnd() {
|
||||
json::value end_tag = {{"data", "end"}};
|
||||
std::string end_message = json::serialize(end_tag);
|
||||
this->SendTextData(end_message);
|
||||
}
|
||||
|
||||
void WebSocketClient::SendEndSignal() {
|
||||
json::value end_tag = {{"signal", "end"}};
|
||||
std::string end_message = json::serialize(end_tag);
|
||||
this->SendTextData(end_message);
|
||||
}
|
||||
|
||||
} // namespace ppspeech
|
@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/common.h"
|
||||
|
||||
#include "boost/asio/connect.hpp"
|
||||
#include "boost/asio/ip/tcp.hpp"
|
||||
#include "boost/beast/core.hpp"
|
||||
#include "boost/beast/websocket.hpp"
|
||||
|
||||
namespace beast = boost::beast; // from <boost/beast.hpp>
|
||||
namespace http = beast::http; // from <boost/beast/http.hpp>
|
||||
namespace websocket = beast::websocket; // from <boost/beast/websocket.hpp>
|
||||
namespace asio = boost::asio; // from <boost/asio.hpp>
|
||||
using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
class WebSocketClient {
|
||||
public:
|
||||
WebSocketClient(const std::string& host, int port);
|
||||
|
||||
void SendTextData(const std::string& data);
|
||||
void SendBinaryData(const void* data, size_t size);
|
||||
void ReadLoopFunc();
|
||||
void Close();
|
||||
void Join();
|
||||
void SendStartSignal();
|
||||
void SendEndSignal();
|
||||
void SendDataEnd();
|
||||
bool Done() const { return done_; }
|
||||
std::string GetResult() { return result_; }
|
||||
|
||||
private:
|
||||
void Connect();
|
||||
std::string host_;
|
||||
std::string result_;
|
||||
int port_;
|
||||
bool done_ = false;
|
||||
asio::io_context ioc_;
|
||||
websocket::stream<tcp::socket> ws_{ioc_};
|
||||
std::unique_ptr<std::thread> t_{nullptr};
|
||||
};
|
||||
}
|
@ -0,0 +1,192 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "websocket/websocket_server.h"
|
||||
|
||||
#include "base/common.h"
|
||||
#include "boost/json/src.hpp"
|
||||
|
||||
namespace json = boost::json;
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
ConnectionHandler::ConnectionHandler(
|
||||
tcp::socket&& socket, const RecognizerResource& recognizer_resource)
|
||||
: ws_(std::move(socket)), recognizer_resource_(recognizer_resource) {}
|
||||
|
||||
void ConnectionHandler::OnSpeechStart() {
|
||||
LOG(INFO) << "Recieved speech start signal, start reading speech";
|
||||
got_start_tag_ = true;
|
||||
json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
|
||||
ws_.text(true);
|
||||
ws_.write(asio::buffer(json::serialize(rv)));
|
||||
recognizer_ = std::make_shared<Recognizer>(recognizer_resource_);
|
||||
// Start decoder thread
|
||||
decode_thread_ = std::make_shared<std::thread>(
|
||||
&ConnectionHandler::DecodeThreadFunc, this);
|
||||
}
|
||||
|
||||
void ConnectionHandler::OnSpeechEnd() {
|
||||
LOG(INFO) << "Recieved speech end signal";
|
||||
CHECK(recognizer_ != nullptr);
|
||||
recognizer_->SetFinished();
|
||||
got_end_tag_ = true;
|
||||
}
|
||||
|
||||
void ConnectionHandler::OnFinalResult(const std::string& result) {
|
||||
LOG(INFO) << "Final result: " << result;
|
||||
json::value rv = {
|
||||
{"status", "ok"}, {"type", "final_result"}, {"result", result}};
|
||||
ws_.text(true);
|
||||
ws_.write(asio::buffer(json::serialize(rv)));
|
||||
}
|
||||
|
||||
void ConnectionHandler::OnFinish() {
|
||||
// Send finish tag
|
||||
json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
|
||||
ws_.text(true);
|
||||
ws_.write(asio::buffer(json::serialize(rv)));
|
||||
}
|
||||
|
||||
void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
|
||||
// Read binary PCM data
|
||||
int num_samples = buffer.size() / sizeof(int16_t);
|
||||
kaldi::Vector<kaldi::BaseFloat> pcm_data(num_samples);
|
||||
const int16_t* pdata = static_cast<const int16_t*>(buffer.data().data());
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
pcm_data(i) = static_cast<float>(*pdata);
|
||||
pdata++;
|
||||
}
|
||||
VLOG(2) << "Recieved " << num_samples << " samples";
|
||||
LOG(INFO) << "Recieved " << num_samples << " samples";
|
||||
CHECK(recognizer_ != nullptr);
|
||||
recognizer_->Accept(pcm_data);
|
||||
}
|
||||
|
||||
void ConnectionHandler::DecodeThreadFunc() {
|
||||
try {
|
||||
while (true) {
|
||||
recognizer_->Decode();
|
||||
if (recognizer_->IsFinished()) {
|
||||
LOG(INFO) << "enter finish";
|
||||
recognizer_->Decode();
|
||||
LOG(INFO) << "finish";
|
||||
std::string result = recognizer_->GetFinalResult();
|
||||
OnFinalResult(result);
|
||||
OnFinish();
|
||||
stop_recognition_ = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
LOG(ERROR) << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
void ConnectionHandler::OnError(const std::string& message) {
|
||||
json::value rv = {{"status", "failed"}, {"message", message}};
|
||||
ws_.text(true);
|
||||
ws_.write(asio::buffer(json::serialize(rv)));
|
||||
// Close websocket
|
||||
ws_.close(websocket::close_code::normal);
|
||||
}
|
||||
|
||||
void ConnectionHandler::OnText(const std::string& message) {
|
||||
json::value v = json::parse(message);
|
||||
if (v.is_object()) {
|
||||
json::object obj = v.get_object();
|
||||
if (obj.find("signal") != obj.end()) {
|
||||
json::string signal = obj["signal"].as_string();
|
||||
if (signal == "start") {
|
||||
OnSpeechStart();
|
||||
} else if (signal == "end") {
|
||||
OnSpeechEnd();
|
||||
} else {
|
||||
OnError("Unexpected signal type");
|
||||
}
|
||||
} else {
|
||||
OnError("Wrong message header");
|
||||
}
|
||||
} else {
|
||||
OnError("Wrong protocol");
|
||||
}
|
||||
}
|
||||
|
||||
void ConnectionHandler::operator()() {
|
||||
try {
|
||||
// Accept the websocket handshake
|
||||
ws_.accept();
|
||||
for (;;) {
|
||||
// This buffer will hold the incoming message
|
||||
beast::flat_buffer buffer;
|
||||
// Read a message
|
||||
ws_.read(buffer);
|
||||
if (ws_.got_text()) {
|
||||
std::string message = beast::buffers_to_string(buffer.data());
|
||||
LOG(INFO) << message;
|
||||
OnText(message);
|
||||
if (got_end_tag_) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!got_start_tag_) {
|
||||
OnError("Start signal is expected before binary data");
|
||||
} else {
|
||||
if (stop_recognition_) {
|
||||
break;
|
||||
}
|
||||
OnSpeechData(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG(INFO) << "Read all pcm data, wait for decoding thread";
|
||||
if (decode_thread_ != nullptr) {
|
||||
decode_thread_->join();
|
||||
}
|
||||
} catch (beast::system_error const& se) {
|
||||
// This indicates that the session was closed
|
||||
if (se.code() != websocket::error::closed) {
|
||||
if (decode_thread_ != nullptr) {
|
||||
decode_thread_->join();
|
||||
}
|
||||
OnSpeechEnd();
|
||||
LOG(ERROR) << se.code().message();
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
LOG(ERROR) << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
void WebSocketServer::Start() {
|
||||
try {
|
||||
auto const address = asio::ip::make_address("0.0.0.0");
|
||||
tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
|
||||
for (;;) {
|
||||
// This will receive the new connection
|
||||
tcp::socket socket{ioc_};
|
||||
// Block until we get a connection
|
||||
acceptor.accept(socket);
|
||||
// Launch the session, transferring ownership of the socket
|
||||
ConnectionHandler handler(std::move(socket), recognizer_resource_);
|
||||
std::thread t(std::move(handler));
|
||||
t.detach();
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
LOG(FATAL) << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ppspeech
|
@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "base/common.h"
|
||||
|
||||
#include "boost/asio/connect.hpp"
|
||||
#include "boost/asio/ip/tcp.hpp"
|
||||
#include "boost/beast/core.hpp"
|
||||
#include "boost/beast/websocket.hpp"
|
||||
|
||||
#include "decoder/recognizer.h"
|
||||
#include "frontend/audio/feature_pipeline.h"
|
||||
|
||||
namespace beast = boost::beast; // from <boost/beast.hpp>
|
||||
namespace http = beast::http; // from <boost/beast/http.hpp>
|
||||
namespace websocket = beast::websocket; // from <boost/beast/websocket.hpp>
|
||||
namespace asio = boost::asio; // from <boost/asio.hpp>
|
||||
using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
|
||||
|
||||
namespace ppspeech {
|
||||
class ConnectionHandler {
|
||||
public:
|
||||
ConnectionHandler(tcp::socket&& socket,
|
||||
const RecognizerResource& recognizer_resource_);
|
||||
void operator()();
|
||||
|
||||
private:
|
||||
void OnSpeechStart();
|
||||
void OnSpeechEnd();
|
||||
void OnText(const std::string& message);
|
||||
void OnFinish();
|
||||
void OnSpeechData(const beast::flat_buffer& buffer);
|
||||
void OnError(const std::string& message);
|
||||
void OnPartialResult(const std::string& result);
|
||||
void OnFinalResult(const std::string& result);
|
||||
void DecodeThreadFunc();
|
||||
std::string SerializeResult(bool finish);
|
||||
|
||||
bool continuous_decoding_ = false;
|
||||
int nbest_ = 1;
|
||||
websocket::stream<tcp::socket> ws_;
|
||||
RecognizerResource recognizer_resource_;
|
||||
|
||||
bool got_start_tag_ = false;
|
||||
bool got_end_tag_ = false;
|
||||
// When endpoint is detected, stop recognition, and stop receiving data.
|
||||
bool stop_recognition_ = false;
|
||||
std::shared_ptr<ppspeech::Recognizer> recognizer_ = nullptr;
|
||||
std::shared_ptr<std::thread> decode_thread_ = nullptr;
|
||||
};
|
||||
|
||||
class WebSocketServer {
|
||||
public:
|
||||
WebSocketServer(int port, const RecognizerResource& recognizer_resource)
|
||||
: port_(port), recognizer_resource_(recognizer_resource) {}
|
||||
|
||||
void Start();
|
||||
|
||||
private:
|
||||
int port_;
|
||||
RecognizerResource recognizer_resource_;
|
||||
// The io_context is required for all I/O
|
||||
asio::io_context ioc_{1};
|
||||
};
|
||||
|
||||
} // namespace ppspeech
|
Loading…
Reference in new issue