Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement session recovery #18883

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 53 additions & 21 deletions src/app/OperationalDeviceProxy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@ void OperationalDeviceProxy::MoveToState(State aTargetState)
ChipLogDetail(Controller, "OperationalDeviceProxy[" ChipLogFormatX64 ":" ChipLogFormatX64 "]: State change %d --> %d",
ChipLogValueX64(mPeerId.GetCompressedFabricId()), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState),
to_underlying(aTargetState));

mState = aTargetState;

if (aTargetState != State::Connecting)
if (aTargetState != State::Connecting && aTargetState != State::Recovering)
{
CleanupCASEClient();
}
Expand Down Expand Up @@ -128,6 +129,10 @@ void OperationalDeviceProxy::Connect(Callback::Callback<OnDeviceConnected> * onC
if (!isConnected)
{
err = EstablishConnection();
if (err == CHIP_NO_ERROR)
{
MoveToState(State::Connecting);
}
}

break;
Expand All @@ -139,6 +144,9 @@ void OperationalDeviceProxy::Connect(Callback::Callback<OnDeviceConnected> * onC
isConnected = true;
break;

case State::Recovering:
break;

default:
err = CHIP_ERROR_INCORRECT_STATE;
}
Expand Down Expand Up @@ -189,7 +197,11 @@ void OperationalDeviceProxy::UpdateDeviceData(const Transport::PeerAddress & add
{
MoveToState(State::HasAddress);
err = EstablishConnection();
if (err != CHIP_NO_ERROR)
if (err == CHIP_NO_ERROR)
{
MoveToState(State::Connecting);
}
else
{
DequeueConnectionCallbacks(err);
}
Expand Down Expand Up @@ -223,8 +235,6 @@ CHIP_ERROR OperationalDeviceProxy::EstablishConnection()
return err;
}

MoveToState(State::Connecting);

return CHIP_NO_ERROR;
}

Expand Down Expand Up @@ -285,31 +295,43 @@ void OperationalDeviceProxy::DequeueConnectionCallbacks(CHIP_ERROR error)

void OperationalDeviceProxy::OnSessionEstablishmentError(CHIP_ERROR error)
{
VerifyOrReturn(mState != State::Uninitialized && mState != State::NeedsAddress,
VerifyOrReturn(mState == State::Connecting || mState == State::Recovering,
ChipLogError(Controller, "HandleCASEConnectionFailure was called while the device was not initialized"));

//
// We don't need to reset the state all the way back to NeedsAddress since all that transpired
// was just CASE connection failure. So let's re-use the cached address to re-do CASE again
// if need-be.
//
MoveToState(State::HasAddress);
if (mState == State::Connecting)
{
//
// We don't need to reset the state all the way back to NeedsAddress since all that transpired
// was just CASE connection failure. So let's re-use the cached address to re-do CASE again
// if need-be.
//
Comment on lines +303 to +307
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ignoring the multicast traffic injected for mDNS and only considering the two peers involved in the session, CASE is the expensive part of the operation and address resolution is relatively cheap. Does it really make sense to undertake a potentially lengthy sequence of retries with a potentially stale address?

MoveToState(State::HasAddress);

DequeueConnectionCallbacks(error);
DequeueConnectionCallbacks(error);
}
else if (mState == State::Recovering)
{
mSecureSession.Get().Value()->DispatchSessionEvent(&SessionDelegate::OnRecoveryFailed);
}

// Do not touch device instance anymore; it might have been destroyed by a failure callback.
}

void OperationalDeviceProxy::OnSessionEstablished(const SessionHandle & session)
{
VerifyOrReturn(mState != State::Uninitialized,
VerifyOrReturn(mState == State::Connecting || mState == State::Recovering,
ChipLogError(Controller, "HandleCASEConnected was called while the device was not initialized"));

bool report = (mState == State::Connecting);

if (!mSecureSession.Grab(session))
return; // Got an invalid session, do not change any state

MoveToState(State::SecureConnected);
DequeueConnectionCallbacks(CHIP_NO_ERROR);
mInitParams.sessionManager->ShiftToSession(session);

if (report)
DequeueConnectionCallbacks(CHIP_NO_ERROR);

// Do not touch this instance anymore; it might have been destroyed by a callback.
}
Expand Down Expand Up @@ -345,9 +367,23 @@ void OperationalDeviceProxy::OnFirstMessageDeliveryFailed()
LookupPeerAddress();
}

void OperationalDeviceProxy::OnSessionHang()
void OperationalDeviceProxy::OnRequestRecovery()
{
// TODO: establish a new session
TrySessionRecovery();
}

void OperationalDeviceProxy::TrySessionRecovery()
{
MoveToState(State::Recovering);
CHIP_ERROR err = EstablishConnection();
if (err == CHIP_NO_ERROR)
{
MoveToState(State::Recovering);
}
else
{
mSecureSession.Get().Value()->DispatchSessionEvent(&SessionDelegate::OnRecoveryFailed);
}
}

CHIP_ERROR OperationalDeviceProxy::ShutdownSubscriptions()
Expand All @@ -370,11 +406,7 @@ OperationalDeviceProxy::~OperationalDeviceProxy()
}
}

if (mCASEClient)
{
// Make sure we don't leak it.
mInitParams.clientPool->Release(mCASEClient);
}
MoveToState(State::Uninitialized);
}

CHIP_ERROR OperationalDeviceProxy::LookupPeerAddress()
Expand Down
7 changes: 5 additions & 2 deletions src/app/OperationalDeviceProxy.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ class DLL_EXPORT OperationalDeviceProxy : public DeviceProxy,
void OnSessionReleased() override;
// Called when a message is not acked within first retrans timer, try to refresh the peer address
void OnFirstMessageDeliveryFailed() override;
// Called when a connection is hanging. Try to re-establish another session, and shift to the new session when done, the
// Triggered by application layer. Try to re-establish another session, and shift to the new session when done, the
// original session won't be touched during the period.
void OnSessionHang() override;
void OnRequestRecovery() override;

/**
* Mark any open session with the device as expired.
Expand Down Expand Up @@ -218,6 +218,7 @@ class DLL_EXPORT OperationalDeviceProxy : public DeviceProxy,
ResolvingAddress, // Address lookup in progress.
HasAddress, // Have an address, CASE handshake not started yet.
Connecting, // CASE handshake in progress.
Recovering, // CASE session hang, trying to establish a new one, the old session is hanging but left untouched.
SecureConnected, // CASE session established.
};

Expand Down Expand Up @@ -277,6 +278,8 @@ class DLL_EXPORT OperationalDeviceProxy : public DeviceProxy,
* This function will set new IP address, port and MRP retransmission intervals of the device.
*/
void UpdateDeviceData(const Transport::PeerAddress & addr, const ReliableMessageProtocolConfig & config);

void TrySessionRecovery();
};

} // namespace chip
8 changes: 6 additions & 2 deletions src/lib/core/CASEAuthTag.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#pragma once

#include <array>

#include <lib/core/CHIPConfig.h>
#include <lib/core/CHIPEncoding.h>
#include <lib/core/NodeId.h>
Expand All @@ -35,11 +37,11 @@ static constexpr size_t kMaxSubjectCATAttributeCount = CHIP_CONFIG_CERT_MAX_RDN_

struct CATValues
{
CASEAuthTag values[kMaxSubjectCATAttributeCount] = { kUndefinedCAT };
std::array<CASEAuthTag, kMaxSubjectCATAttributeCount> values = { kUndefinedCAT };

/* @brief Returns size of the CAT values array.
*/
static constexpr size_t size() { return ArraySize(values); }
static constexpr size_t size() { return std::tuple_size<decltype(values)>::value; }

/* @brief Returns true if subject input checks against one of the CATs in the values array.
*/
Expand All @@ -58,6 +60,8 @@ struct CATValues
return false;
}

bool operator==(const CATValues & that) const { return values == that.values; }

static constexpr size_t kSerializedLength = kMaxSubjectCATAttributeCount * sizeof(CASEAuthTag);
typedef uint8_t Serialized[kSerializedLength];

Expand Down
1 change: 1 addition & 0 deletions src/protocols/secure_channel/CASEServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ void CASEServer::OnSessionEstablished(const SessionHandle & session)
{
ChipLogProgress(Inet, "CASE Session established to peer: " ChipLogFormatScopedNodeId,
ChipLogValueScopedNodeId(session->GetPeer()));
mSessionManager->ShiftToSession(session);
Cleanup();
}
} // namespace chip
9 changes: 9 additions & 0 deletions src/transport/SecureSession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,14 @@ Access::SubjectDescriptor SecureSession::GetSubjectDescriptor() const
return subjectDescriptor;
}

void SecureSession::TryShiftToSession(const SessionHandle & session)
{
if (GetSecureSessionType() == SecureSession::Type::kCASE && GetPeer() == session->GetPeer() &&
GetPeerCATs() == session->AsSecureSession()->GetPeerCATs())
{
Session::DoShiftToSession(session);
}
}

} // namespace Transport
} // namespace chip
5 changes: 4 additions & 1 deletion src/transport/SecureSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ class SecureSession : public Session, public ReferenceCounted<SecureSession, Sec
NodeId GetPeerNodeId() const { return mPeerNodeId; }
NodeId GetLocalNodeId() const { return mLocalNodeId; }

CATValues GetPeerCATs() const { return mPeerCATs; }
const CATValues & GetPeerCATs() const { return mPeerCATs; }

void SetMRPConfig(const ReliableMessageProtocolConfig & config) { mMRPConfig = config; }

Expand Down Expand Up @@ -220,6 +220,9 @@ class SecureSession : public Session, public ReferenceCounted<SecureSession, Sec

SessionMessageCounter & GetSessionMessageCounter() { return mSessionMessageCounter; }

// This should be a private API, only meant to be called by SessionManager
void TryShiftToSession(const SessionHandle & session);

private:
enum class State : uint8_t
{
Expand Down
16 changes: 16 additions & 0 deletions src/transport/Session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,21 @@ OutgoingGroupSession * Session::AsOutgoingGroupSession()
return static_cast<OutgoingGroupSession *>(this);
}

void Session::DoShiftToSession(const SessionHandle & session)
{
// Shift to the new session, checks are performed by the subclass implementation which is the caller.
IntrusiveList<SessionHolder>::Iterator iter = mHolders.begin();
while (iter != mHolders.end())
{
// The iterator can be invalid once it is migrated to another session. So we store its next before it is happening.
IntrusiveList<SessionHolder>::Iterator next = iter;
++next;

iter->ShiftToSession(session);

iter = next;
}
}

} // namespace Transport
} // namespace chip
4 changes: 3 additions & 1 deletion src/transport/Session.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,14 @@ class Session
SessionHandle session(*this);
while (!mHolders.Empty())
{
mHolders.begin()->OnSessionReleased(); // OnSessionReleased must remove the item from the linked list
mHolders.begin()->SessionReleased(); // OnSessionReleased must remove the item from the linked list
}
}

void SetFabricIndex(FabricIndex index) { mFabricIndex = index; }

void DoShiftToSession(const SessionHandle & session);

private:
IntrusiveList<SessionHolder> mHolders;
FabricIndex mFabricIndex = kUndefinedFabricIndex;
Expand Down
26 changes: 25 additions & 1 deletion src/transport/SessionDelegate.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,16 @@ class DLL_EXPORT SessionDelegate
* Called when a new secure session to the same peer is established, over the delegate of SessionHolderWithDelegate object. It
* is suggested to shift to the newly created session.
*
* Our security model is built upon Exchanges and Sessions, but not SessionHolders, such that SessionHolders should be able to
* shift to a new sessoin freely. If an application is holding a session which is not intent to be shifted, it can provides
* its shifting policy by override GetNewSessionHandlingPolicy in SessionDelegate. For example SessionHolders inside
* ExchangeContext and PairingSession are not eligible for auto-shifting.
*
* Note: the default implementation orders shifting to the new session, it should be fine for all users, unless the
* SessionHolder object is expected to be sticky to a specified session.
* SessionHolder object is expected to be sticky to a specified session.
*
* Note: the implementation should not modify session pool nor session holders (eg, adding new session, removing old session),
* or else something inconsistent can be happened inside Session::DoShiftToSession.
*/
virtual NewSessionHandlingPolicy GetNewSessionHandlingPolicy() { return NewSessionHandlingPolicy::kShiftToNewSession; }

Expand All @@ -64,6 +72,22 @@ class DLL_EXPORT SessionDelegate
* Note: the implementation must not do anything that will destroy the session or change the SessionHolder.
*/
virtual void OnSessionHang() {}

/**
* @brief
* Called when an application requests to recover a session.
*
* Note: the implementation must not do anything that will destroy the session or change the SessionHolder.
*/
virtual void OnRequestRecovery() {}

/**
* @brief
* Called when a pairing fails to recover a session.
*
* Note: the implementation must not do anything that will destroy the session or change the SessionHolder.
*/
virtual void OnRecoveryFailed() {}
};

} // namespace chip
23 changes: 17 additions & 6 deletions src/transport/SessionHolder.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,23 @@ namespace chip {
* released when the underlying session is released. One must verify it is available before use. The object can be
* created using SessionHandle.Grab()
*/
class SessionHolder : public SessionDelegate, public IntrusiveListNodeBase
class SessionHolder : public IntrusiveListNodeBase
{
public:
SessionHolder() {}
~SessionHolder() override;
virtual ~SessionHolder();

SessionHolder(const SessionHolder &);
SessionHolder(SessionHolder && that);
SessionHolder & operator=(const SessionHolder &);
SessionHolder & operator=(SessionHolder && that);

// Implement SessionDelegate
void OnSessionReleased() override { Release(); }
virtual void SessionReleased() { Release(); }
virtual void ShiftToSession(const SessionHandle & session)
{
Release();
Grab(session);
}

bool Contains(const SessionHandle & session) const
{
Expand All @@ -51,7 +55,7 @@ class SessionHolder : public SessionDelegate, public IntrusiveListNodeBase
bool Grab(const SessionHandle & session);
void Release();

operator bool() const { return mSession.HasValue(); }
explicit operator bool() const { return mSession.HasValue(); }
Optional<SessionHandle> Get() const
{
//
Expand All @@ -78,17 +82,24 @@ class SessionHolderWithDelegate : public SessionHolder
{
public:
SessionHolderWithDelegate(SessionDelegate & delegate) : mDelegate(delegate) {}
SessionHolderWithDelegate(SessionHolder & holder, SessionDelegate & delegate) : SessionHolder(holder), mDelegate(delegate) {}
SessionHolderWithDelegate(const SessionHandle & handle, SessionDelegate & delegate) : mDelegate(delegate) { Grab(handle); }
operator bool() const { return SessionHolder::operator bool(); }

void OnSessionReleased() override
void SessionReleased() override
{
Release();

// Note, the session is already cleared during mDelegate.OnSessionReleased
mDelegate.OnSessionReleased();
}

void ShiftToSession(const SessionHandle & session) override
{
if (mDelegate.GetNewSessionHandlingPolicy() == SessionDelegate::NewSessionHandlingPolicy::kShiftToNewSession)
SessionHolder::ShiftToSession(session);
}

void DispatchSessionEvent(SessionDelegate::Event event) override { (mDelegate.*event)(); }

private:
Expand Down
Loading