Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add operational resolution retries in OperationalSessionSetup. #25991

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 54 additions & 5 deletions src/app/OperationalSessionSetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,10 @@ CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
{
++mAttemptsDone;
}
if (mResolveAttemptsAllowed > 0)
{
--mResolveAttemptsAllowed;
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

// NOTE: This is public API that can be used to update our stored peer
Expand Down Expand Up @@ -502,9 +506,44 @@ void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerI
ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT,
mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format());

// Does it make sense to ScheduleSessionSetupReattempt() here? DNS-SD
// resolution has its own retry/backoff mechanisms, so if it's failed we
// have already done a lot of that.
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// If we're in a mode where we would generally retry CASE, retry operational
// discovery once. That allows us to more-gracefully handle broken networks
// where multicast DNS does not actually work and hence only the initial
// unicast DNS-SD queries get a response.
//
// We check for State::ResolvingAddress just in case in the meantime
// something weird happened and we are no longer trying to resolve an
// address.
if (mState == State::ResolvingAddress && mResolveAttemptsAllowed > 0)
{
ChipLogProgress(Discovery, "Retrying operational DNS-SD discovery. Attempts remaining: %u", mResolveAttemptsAllowed);

// Pretend like our previous attempt (i.e. call to LookupPeerAddress)
// has not happened for purposes of the generic attempt counters, so we
// don't mess up the counters for our actual CASE retry logic.
if (mRemainingAttempts < UINT8_MAX)
{
++mRemainingAttempts;
}
if (mAttemptsDone > 0)
{
--mAttemptsDone;
}

CHIP_ERROR err = LookupPeerAddress();
if (err == CHIP_NO_ERROR)
{
// We need to notify our consumer that the resolve will take more
// time, but we don't actually know how much time it will take,
// because the resolver does not expose that information. Just use
// one minute to be safe.
using namespace chip::System::Clock::Literals;
NotifyRetryHandlers(reason, 60_s16);
return;
}
}
#endif

// No need to modify any variables in `this` since call below releases `this`.
DequeueConnectionCallbacks(reason);
Expand All @@ -531,6 +570,11 @@ void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount)
{
mRemainingAttempts = attemptCount;
}

if (attemptCount > mResolveAttemptsAllowed)
{
mResolveAttemptsAllowed = attemptCount;
}
}

CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay)
Expand Down Expand Up @@ -619,11 +663,16 @@ void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const Reliab
System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig);
auto timeoutSecs = std::chrono::duration_cast<System::Clock::Seconds16>(messageTimeout);
// Add 1 second in case we had fractional milliseconds in messageTimeout.
timeoutSecs += System::Clock::Seconds16(1);
using namespace chip::System::Clock::Literals;
NotifyRetryHandlers(error, timeoutSecs + 1_s16 + retryDelay);
}

void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate)
{
for (auto * item = mConnectionRetry.First(); item && item != &mConnectionRetry; item = item->mNext)
{
auto cb = Callback::Callback<OnDeviceConnectionRetry>::FromCancelable(item);
cb->mCall(cb->mContext, mPeerId, error, timeoutSecs + retryDelay);
cb->mCall(cb->mContext, mPeerId, error, timeoutEstimate);
}
}
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
Expand Down
8 changes: 8 additions & 0 deletions src/app/OperationalSessionSetup.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
uint8_t mRemainingAttempts = 0;
uint8_t mAttemptsDone = 0;

uint8_t mResolveAttemptsAllowed = 0;

Callback::CallbackDeque mConnectionRetry;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

Expand Down Expand Up @@ -354,6 +356,12 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate,
*/
void NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig,
System::Clock::Seconds16 retryDelay);

/**
* A version of NotifyRetryHandlers that passes in a retry timeout estimate
* directly.
*/
void NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate);
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
};

Expand Down