mirror of
https://github.com/Icinga/icinga2.git
synced 2025-09-26 19:18:48 +02:00
PR #7445 incorrectly assumed that a peer that had already disconnected and never reconnected was due to the endpoint client being dropped after a successful socket shutdown. However, the issue at that time was that there was not a single timeout guards that could cancel the `async_shutdown` call, petentially blocking indefinetely. Although removing the client from cache early might have allowed the endpoint to reconnect, it did not resolve the underlying problem. Now that we have a proper cancellation timeout, we can wait until the currently used socket is fully closed before dropping the client from our cache. When our socket termination works reliably, the `ApiListener` reconnect timer should attempt to reconnect this endpoint after the next tick. Additionally, we now have logs both for before and after socket termination, which may help identify if it is hanging somewhere in between.
371 lines
10 KiB
C++
371 lines
10 KiB
C++
/* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
|
|
|
|
#include "remote/jsonrpcconnection.hpp"
|
|
#include "remote/apilistener.hpp"
|
|
#include "remote/apifunction.hpp"
|
|
#include "remote/jsonrpc.hpp"
|
|
#include "base/defer.hpp"
|
|
#include "base/configtype.hpp"
|
|
#include "base/io-engine.hpp"
|
|
#include "base/json.hpp"
|
|
#include "base/objectlock.hpp"
|
|
#include "base/utility.hpp"
|
|
#include "base/logger.hpp"
|
|
#include "base/exception.hpp"
|
|
#include "base/convert.hpp"
|
|
#include "base/tlsstream.hpp"
|
|
#include <memory>
|
|
#include <utility>
|
|
#include <boost/asio/io_context.hpp>
|
|
#include <boost/asio/spawn.hpp>
|
|
#include <boost/date_time/posix_time/posix_time_duration.hpp>
|
|
#include <boost/system/system_error.hpp>
|
|
#include <boost/thread/once.hpp>
|
|
|
|
using namespace icinga;
|
|
|
|
static Value SetLogPositionHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
|
REGISTER_APIFUNCTION(SetLogPosition, log, &SetLogPositionHandler);
|
|
|
|
static RingBuffer l_TaskStats (15 * 60);
|
|
|
|
JsonRpcConnection::JsonRpcConnection(const String& identity, bool authenticated,
|
|
const Shared<AsioTlsStream>::Ptr& stream, ConnectionRole role)
|
|
: JsonRpcConnection(identity, authenticated, stream, role, IoEngine::Get().GetIoContext())
|
|
{
|
|
}
|
|
|
|
JsonRpcConnection::JsonRpcConnection(const String& identity, bool authenticated,
|
|
const Shared<AsioTlsStream>::Ptr& stream, ConnectionRole role, boost::asio::io_context& io)
|
|
: m_Identity(identity), m_Authenticated(authenticated), m_Stream(stream), m_Role(role),
|
|
m_Timestamp(Utility::GetTime()), m_Seen(Utility::GetTime()), m_NextHeartbeat(0), m_IoStrand(io),
|
|
m_OutgoingMessagesQueued(io), m_WriterDone(io), m_ShuttingDown(false),
|
|
m_CheckLivenessTimer(io), m_HeartbeatTimer(io)
|
|
{
|
|
if (authenticated)
|
|
m_Endpoint = Endpoint::GetByName(identity);
|
|
}
|
|
|
|
void JsonRpcConnection::Start()
|
|
{
|
|
namespace asio = boost::asio;
|
|
|
|
JsonRpcConnection::Ptr keepAlive (this);
|
|
|
|
IoEngine::SpawnCoroutine(m_IoStrand, [this, keepAlive](asio::yield_context yc) { HandleIncomingMessages(yc); });
|
|
IoEngine::SpawnCoroutine(m_IoStrand, [this, keepAlive](asio::yield_context yc) { WriteOutgoingMessages(yc); });
|
|
IoEngine::SpawnCoroutine(m_IoStrand, [this, keepAlive](asio::yield_context yc) { HandleAndWriteHeartbeats(yc); });
|
|
IoEngine::SpawnCoroutine(m_IoStrand, [this, keepAlive](asio::yield_context yc) { CheckLiveness(yc); });
|
|
}
|
|
|
|
void JsonRpcConnection::HandleIncomingMessages(boost::asio::yield_context yc)
|
|
{
|
|
m_Stream->next_layer().SetSeen(&m_Seen);
|
|
|
|
while (!m_ShuttingDown) {
|
|
String message;
|
|
|
|
try {
|
|
message = JsonRpc::ReadMessage(m_Stream, yc, m_Endpoint ? -1 : 1024 * 1024);
|
|
} catch (const std::exception& ex) {
|
|
Log(m_ShuttingDown ? LogDebug : LogNotice, "JsonRpcConnection")
|
|
<< "Error while reading JSON-RPC message for identity '" << m_Identity
|
|
<< "': " << DiagnosticInformation(ex);
|
|
|
|
break;
|
|
}
|
|
|
|
m_Seen = Utility::GetTime();
|
|
|
|
try {
|
|
CpuBoundWork handleMessage (yc);
|
|
|
|
MessageHandler(message);
|
|
|
|
l_TaskStats.InsertValue(Utility::GetTime(), 1);
|
|
} catch (const std::exception& ex) {
|
|
Log(m_ShuttingDown ? LogDebug : LogWarning, "JsonRpcConnection")
|
|
<< "Error while processing JSON-RPC message for identity '" << m_Identity
|
|
<< "': " << DiagnosticInformation(ex);
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
Disconnect();
|
|
}
|
|
|
|
void JsonRpcConnection::WriteOutgoingMessages(boost::asio::yield_context yc)
|
|
{
|
|
Defer signalWriterDone ([this]() { m_WriterDone.Set(); });
|
|
|
|
do {
|
|
m_OutgoingMessagesQueued.Wait(yc);
|
|
|
|
auto queue (std::move(m_OutgoingMessagesQueue));
|
|
|
|
m_OutgoingMessagesQueue.clear();
|
|
m_OutgoingMessagesQueued.Clear();
|
|
|
|
if (!queue.empty()) {
|
|
try {
|
|
for (auto& message : queue) {
|
|
size_t bytesSent = JsonRpc::SendRawMessage(m_Stream, message, yc);
|
|
|
|
if (m_Endpoint) {
|
|
m_Endpoint->AddMessageSent(bytesSent);
|
|
}
|
|
}
|
|
|
|
m_Stream->async_flush(yc);
|
|
} catch (const std::exception& ex) {
|
|
Log(m_ShuttingDown ? LogDebug : LogWarning, "JsonRpcConnection")
|
|
<< "Error while sending JSON-RPC message for identity '"
|
|
<< m_Identity << "'\n" << DiagnosticInformation(ex);
|
|
|
|
break;
|
|
}
|
|
}
|
|
} while (!m_ShuttingDown);
|
|
|
|
Disconnect();
|
|
}
|
|
|
|
double JsonRpcConnection::GetTimestamp() const
|
|
{
|
|
return m_Timestamp;
|
|
}
|
|
|
|
String JsonRpcConnection::GetIdentity() const
|
|
{
|
|
return m_Identity;
|
|
}
|
|
|
|
bool JsonRpcConnection::IsAuthenticated() const
|
|
{
|
|
return m_Authenticated;
|
|
}
|
|
|
|
Endpoint::Ptr JsonRpcConnection::GetEndpoint() const
|
|
{
|
|
return m_Endpoint;
|
|
}
|
|
|
|
Shared<AsioTlsStream>::Ptr JsonRpcConnection::GetStream() const
|
|
{
|
|
return m_Stream;
|
|
}
|
|
|
|
ConnectionRole JsonRpcConnection::GetRole() const
|
|
{
|
|
return m_Role;
|
|
}
|
|
|
|
void JsonRpcConnection::SendMessage(const Dictionary::Ptr& message)
|
|
{
|
|
if (m_ShuttingDown) {
|
|
BOOST_THROW_EXCEPTION(std::runtime_error("Cannot send message to already disconnected API client '" + GetIdentity() + "'!"));
|
|
}
|
|
|
|
Ptr keepAlive (this);
|
|
|
|
m_IoStrand.post([this, keepAlive, message]() { SendMessageInternal(message); });
|
|
}
|
|
|
|
void JsonRpcConnection::SendRawMessage(const String& message)
|
|
{
|
|
if (m_ShuttingDown) {
|
|
BOOST_THROW_EXCEPTION(std::runtime_error("Cannot send message to already disconnected API client '" + GetIdentity() + "'!"));
|
|
}
|
|
|
|
Ptr keepAlive (this);
|
|
|
|
m_IoStrand.post([this, keepAlive, message]() {
|
|
if (m_ShuttingDown) {
|
|
return;
|
|
}
|
|
|
|
m_OutgoingMessagesQueue.emplace_back(message);
|
|
m_OutgoingMessagesQueued.Set();
|
|
});
|
|
}
|
|
|
|
void JsonRpcConnection::SendMessageInternal(const Dictionary::Ptr& message)
|
|
{
|
|
if (m_ShuttingDown) {
|
|
return;
|
|
}
|
|
|
|
m_OutgoingMessagesQueue.emplace_back(JsonEncode(message));
|
|
m_OutgoingMessagesQueued.Set();
|
|
}
|
|
|
|
void JsonRpcConnection::Disconnect()
|
|
{
|
|
namespace asio = boost::asio;
|
|
|
|
if (!m_ShuttingDown.exchange(true)) {
|
|
JsonRpcConnection::Ptr keepAlive (this);
|
|
|
|
IoEngine::SpawnCoroutine(m_IoStrand, [this, keepAlive](asio::yield_context yc) {
|
|
Log(LogWarning, "JsonRpcConnection")
|
|
<< "API client disconnected for identity '" << m_Identity << "'";
|
|
|
|
m_OutgoingMessagesQueued.Set();
|
|
|
|
m_WriterDone.Wait(yc);
|
|
|
|
m_CheckLivenessTimer.cancel();
|
|
m_HeartbeatTimer.cancel();
|
|
|
|
m_Stream->GracefulDisconnect(m_IoStrand, yc);
|
|
|
|
if (m_Endpoint) {
|
|
m_Endpoint->RemoveClient(this);
|
|
} else {
|
|
ApiListener::GetInstance()->RemoveAnonymousClient(this);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
void JsonRpcConnection::MessageHandler(const String& jsonString)
|
|
{
|
|
Dictionary::Ptr message = JsonRpc::DecodeMessage(jsonString);
|
|
|
|
if (m_Endpoint && message->Contains("ts")) {
|
|
double ts = message->Get("ts");
|
|
|
|
/* ignore old messages */
|
|
if (ts < m_Endpoint->GetRemoteLogPosition())
|
|
return;
|
|
|
|
m_Endpoint->SetRemoteLogPosition(ts);
|
|
}
|
|
|
|
MessageOrigin::Ptr origin = new MessageOrigin();
|
|
origin->FromClient = this;
|
|
|
|
if (m_Endpoint) {
|
|
if (m_Endpoint->GetZone() != Zone::GetLocalZone())
|
|
origin->FromZone = m_Endpoint->GetZone();
|
|
else
|
|
origin->FromZone = Zone::GetByName(message->Get("originZone"));
|
|
|
|
m_Endpoint->AddMessageReceived(jsonString.GetLength());
|
|
}
|
|
|
|
Value vmethod;
|
|
|
|
if (!message->Get("method", &vmethod)) {
|
|
Value vid;
|
|
|
|
if (!message->Get("id", &vid))
|
|
return;
|
|
|
|
Log(LogWarning, "JsonRpcConnection",
|
|
"We received a JSON-RPC response message. This should never happen because we're only ever sending notifications.");
|
|
|
|
return;
|
|
}
|
|
|
|
String method = vmethod;
|
|
|
|
Log(LogNotice, "JsonRpcConnection")
|
|
<< "Received '" << method << "' message from identity '" << m_Identity << "'.";
|
|
|
|
Dictionary::Ptr resultMessage = new Dictionary();
|
|
|
|
try {
|
|
ApiFunction::Ptr afunc = ApiFunction::GetByName(method);
|
|
|
|
if (!afunc) {
|
|
Log(LogNotice, "JsonRpcConnection")
|
|
<< "Call to non-existent function '" << method << "' from endpoint '" << m_Identity << "'.";
|
|
} else {
|
|
Dictionary::Ptr params = message->Get("params");
|
|
if (params)
|
|
resultMessage->Set("result", afunc->Invoke(origin, params));
|
|
else
|
|
resultMessage->Set("result", Empty);
|
|
}
|
|
} catch (const std::exception& ex) {
|
|
/* TODO: Add a user readable error message for the remote caller */
|
|
String diagInfo = DiagnosticInformation(ex);
|
|
resultMessage->Set("error", diagInfo);
|
|
Log(LogWarning, "JsonRpcConnection")
|
|
<< "Error while processing message for identity '" << m_Identity << "'\n" << diagInfo;
|
|
}
|
|
|
|
if (message->Contains("id")) {
|
|
resultMessage->Set("jsonrpc", "2.0");
|
|
resultMessage->Set("id", message->Get("id"));
|
|
|
|
SendMessageInternal(resultMessage);
|
|
}
|
|
}
|
|
|
|
Value SetLogPositionHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
|
|
{
|
|
double log_position = params->Get("log_position");
|
|
Endpoint::Ptr endpoint = origin->FromClient->GetEndpoint();
|
|
|
|
if (!endpoint)
|
|
return Empty;
|
|
|
|
if (log_position > endpoint->GetLocalLogPosition())
|
|
endpoint->SetLocalLogPosition(log_position);
|
|
|
|
return Empty;
|
|
}
|
|
|
|
void JsonRpcConnection::CheckLiveness(boost::asio::yield_context yc)
|
|
{
|
|
boost::system::error_code ec;
|
|
|
|
if (!m_Authenticated) {
|
|
/* Anonymous connections are normally only used for requesting a certificate and are closed after this request
|
|
* is received. However, the request is only sent if the child has successfully verified the certificate of its
|
|
* parent so that it is an authenticated connection from its perspective. In case this verification fails, both
|
|
* ends view it as an anonymous connection and never actually use it but attempt a reconnect after 10 seconds
|
|
* leaking the connection. Therefore close it after a timeout.
|
|
*/
|
|
|
|
m_CheckLivenessTimer.expires_from_now(boost::posix_time::seconds(10));
|
|
m_CheckLivenessTimer.async_wait(yc[ec]);
|
|
|
|
if (m_ShuttingDown) {
|
|
return;
|
|
}
|
|
|
|
auto remote (m_Stream->lowest_layer().remote_endpoint());
|
|
|
|
Log(LogInformation, "JsonRpcConnection")
|
|
<< "Closing anonymous connection [" << remote.address() << "]:" << remote.port() << " after 10 seconds.";
|
|
|
|
Disconnect();
|
|
} else {
|
|
for (;;) {
|
|
m_CheckLivenessTimer.expires_from_now(boost::posix_time::seconds(30));
|
|
m_CheckLivenessTimer.async_wait(yc[ec]);
|
|
|
|
if (m_ShuttingDown) {
|
|
break;
|
|
}
|
|
|
|
if (m_Seen < Utility::GetTime() - 60 && (!m_Endpoint || !m_Endpoint->GetSyncing())) {
|
|
Log(LogInformation, "JsonRpcConnection")
|
|
<< "No messages for identity '" << m_Identity << "' have been received in the last 60 seconds.";
|
|
|
|
Disconnect();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
double JsonRpcConnection::GetWorkQueueRate()
|
|
{
|
|
return l_TaskStats.UpdateAndGetValues(Utility::GetTime(), 60) / 60.0;
|
|
}
|