From 293c89a2f87838e41bf6dd017fda43e38be8d040 Mon Sep 17 00:00:00 2001 From: "Irene Siu (Apple)" Date: Thu, 14 Apr 2022 11:23:00 -0700 Subject: [PATCH] Cleanup flow of error handling and retries for OTA updates (#17072) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * - Added CHIP_ERROR_MAX_RETRY_EXCEEDED. Changed UpdateNotFound() to return CHIP_ERROR value. - Reversed order of state change call and action calls. * - Remove un-used GenericOTARequestorDriver::HandleError() * - Added GenericOTARequestorDriver::ScheduleRetry() * - Move DefaultOTARequestorDriver::ScheduleRetry() from public to protected API - Re-instate DefaultOTARequestorDriver::HandleError() to handle re-tries on errors. * scripts/helpers/restyle-diff.sh * - Changed mServer NULL ptr check to VerifyOrDie() check - If UpdateFailureState::kQuerying error is CHIP_ERROR_BUFFER_TOO_SMALL, set to idle. Else retry. * - Remove un-used mUpdateNotFoundReason and mRetryDelaySec from DefaultOTARequestorDriver * - In DefaultOTARequestorDriver::SendQueryImage(), change state to currentUpdateState * - Renamed ScheduleRetry to ScheduleQueryRetry() - Updated CHIPError.cpp for CHIP_ERROR_MAX_RETRY_EXCEEDED - In DefaultOTARequestorDriver::ScheduleQueryRetry(), moved VerifyOrDie NULL check on mRequestor into the block in which it is used. - Added function header comment for ScheduleQueryRetry() * - In DefaultOTARequestor::OnQueryImageResponse(), moved definition of status into OTAQueryStatus::kBusy and OTAQueryStatus::kNotAvailable blocks. - Removed un-used UpdateFailureState::kAwaitingNextAction enum - In DefaultOTARequestor::OnQueryImageResponse(), for case OTAQueryStatus::kNotAvailable, transition to idle regardless of return value of UpdateNotFound() so that we can perform the next Query * - Added CHIP_ERROR_MAX_RETRY_EXCEEDED to TestCHIPErrorStr.cpp - Removed UpdateFailureState::kIdle case from DefaultOTARequestorDriver::HandleError() - Removed ‘Ignore error and transition to Idle’ comments from DefaultOTARequestorDriver::HandleError() - In DefaultOTARequestorDriver::ScheduleQueryRetry(), moved definition of providerLocation into the block where it’s used * - In ExtendedOTARequestorDriver::UpdateAvailable(), added HandleError() call back. - In enum class UpdateFailureState, removed kIdle - In DefaultOTARequestorDriver::ScheduleQueryRetry(), removed status and willTryAnotherQuery variables. * - In DefaultOTARequestorDriver::UpdateNotFound()< reduced code by calling ScheduleQueryRetry() * - Added CHIP_ERROR_PROVIDER_LIST_EXHAUSTED error case to be used in DefaultOTARequestorDriver::ScheduleQueryRetry() * - Remove DefaultOTARequestorDriver::HandleError() * Remove un-used UpdateFailureState enum class * In DefaultOTARequestorDriver::ScheduleQueryRetry(), add status variable to store return status and return at the end of the function to resolve CI compilation error: ../src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp:449:9: error: do not use 'else' after 'return' [readability-else-after-return,-warnings-as-errors] * In DefaultOTARequestorDriver::ScheduleQueryRetry(), add CHIP_NO_ERROR check before scheduling retry to prevent scheduling retry when provider list has been exhasuted or when max retries has been exhausted. * Update function comments for RecordErrorUpdateState() --- .../ota-requestor/DefaultOTARequestor.cpp | 95 +++++++------- .../ota-requestor/DefaultOTARequestor.h | 5 +- .../DefaultOTARequestorDriver.cpp | 124 +++++++++--------- .../ota-requestor/DefaultOTARequestorDriver.h | 8 +- .../ExtendedOTARequestorDriver.cpp | 1 - .../ota-requestor/OTARequestorDriver.h | 19 +-- src/lib/core/CHIPError.cpp | 6 + src/lib/core/CHIPError.h | 20 ++- src/lib/core/tests/TestCHIPErrorStr.cpp | 2 + 9 files changed, 144 insertions(+), 136 deletions(-) diff --git a/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp b/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp index 52960cbc3004fe..2fc9eb46334fff 100644 --- a/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp +++ b/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp @@ -153,7 +153,7 @@ void DefaultOTARequestor::OnQueryImageResponse(void * context, const QueryImageR if (err != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "QueryImageResponse contains invalid fields: %" CHIP_ERROR_FORMAT, err.Format()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, err); + requestorCore->RecordErrorUpdateState(err); return; } @@ -162,7 +162,7 @@ void DefaultOTARequestor::OnQueryImageResponse(void * context, const QueryImageR if (!requestorCore->mProviderLocation.HasValue()) { ChipLogError(SoftwareUpdate, "No provider location set"); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, CHIP_ERROR_INCORRECT_STATE); + requestorCore->RecordErrorUpdateState(CHIP_ERROR_INCORRECT_STATE); return; } @@ -174,7 +174,7 @@ void DefaultOTARequestor::OnQueryImageResponse(void * context, const QueryImageR "The ImageURI provider node 0x" ChipLogFormatX64 " does not match the QueryImageResponse provider node 0x" ChipLogFormatX64, ChipLogValueX64(update.nodeId), ChipLogValueX64(requestorCore->mProviderLocation.Value().providerNodeID)); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, CHIP_ERROR_WRONG_NODE_ID); + requestorCore->RecordErrorUpdateState(CHIP_ERROR_WRONG_NODE_ID); return; } @@ -199,7 +199,7 @@ void DefaultOTARequestor::OnQueryImageResponse(void * context, const QueryImageR if (update.fileDesignator.size() > fileDesignator.size()) { ChipLogError(SoftwareUpdate, "File designator size %zu is too large to store", update.fileDesignator.size()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, err); + requestorCore->RecordErrorUpdateState(CHIP_ERROR_BUFFER_TOO_SMALL); return; } memcpy(fileDesignator.data(), update.fileDesignator.data(), update.fileDesignator.size()); @@ -214,25 +214,35 @@ void DefaultOTARequestor::OnQueryImageResponse(void * context, const QueryImageR ChipLogDetail(SoftwareUpdate, "Available update version %" PRIu32 " is <= current version %" PRIu32 ", update ignored", update.softwareVersion, requestorCore->mCurrentVersion); - requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kIdle, OTAChangeReasonEnum::kSuccess); requestorCore->mOtaRequestorDriver->UpdateNotFound(UpdateNotFoundReason::kUpToDate, System::Clock::Seconds32(response.delayedActionTime.ValueOr(0))); + requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kIdle, OTAChangeReasonEnum::kSuccess); } break; } - case OTAQueryStatus::kBusy: - requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kDelayedOnQuery, OTAChangeReasonEnum::kDelayByProvider); - requestorCore->mOtaRequestorDriver->UpdateNotFound(UpdateNotFoundReason::kBusy, - System::Clock::Seconds32(response.delayedActionTime.ValueOr(0))); + case OTAQueryStatus::kBusy: { + CHIP_ERROR status = requestorCore->mOtaRequestorDriver->UpdateNotFound( + UpdateNotFoundReason::kBusy, System::Clock::Seconds32(response.delayedActionTime.ValueOr(0))); + if ((status == CHIP_ERROR_MAX_RETRY_EXCEEDED) || (status == CHIP_ERROR_PROVIDER_LIST_EXHAUSTED)) + { + requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kIdle, OTAChangeReasonEnum::kSuccess); + } + else + { + requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kDelayedOnQuery, OTAChangeReasonEnum::kDelayByProvider); + } + break; - case OTAQueryStatus::kNotAvailable: - requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kIdle, OTAChangeReasonEnum::kSuccess); + } + case OTAQueryStatus::kNotAvailable: { requestorCore->mOtaRequestorDriver->UpdateNotFound(UpdateNotFoundReason::kNotAvailable, System::Clock::Seconds32(response.delayedActionTime.ValueOr(0))); + requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kIdle, OTAChangeReasonEnum::kSuccess); break; + } default: - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, CHIP_ERROR_BAD_REQUEST); + requestorCore->RecordErrorUpdateState(CHIP_ERROR_BAD_REQUEST); break; } } @@ -252,7 +262,7 @@ void DefaultOTARequestor::OnQueryImageFailure(void * context, CHIP_ERROR error) error = CHIP_ERROR_CONNECTION_CLOSED_UNEXPECTEDLY; } - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, error); + requestorCore->RecordErrorUpdateState(error); } void DefaultOTARequestor::OnApplyUpdateResponse(void * context, const ApplyUpdateResponse::DecodableType & response) @@ -268,12 +278,12 @@ void DefaultOTARequestor::OnApplyUpdateResponse(void * context, const ApplyUpdat requestorCore->mOtaRequestorDriver->UpdateConfirmed(System::Clock::Seconds32(response.delayedActionTime)); break; case OTAApplyUpdateAction::kAwaitNextAction: - requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kDelayedOnApply, OTAChangeReasonEnum::kDelayByProvider); requestorCore->mOtaRequestorDriver->UpdateSuspended(System::Clock::Seconds32(response.delayedActionTime)); + requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kDelayedOnApply, OTAChangeReasonEnum::kDelayByProvider); break; case OTAApplyUpdateAction::kDiscontinue: - requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kIdle, OTAChangeReasonEnum::kSuccess); requestorCore->mOtaRequestorDriver->UpdateDiscontinued(); + requestorCore->RecordNewUpdateState(OTAUpdateStateEnum::kIdle, OTAChangeReasonEnum::kSuccess); break; } } @@ -284,7 +294,7 @@ void DefaultOTARequestor::OnApplyUpdateFailure(void * context, CHIP_ERROR error) VerifyOrDie(requestorCore != nullptr); ChipLogDetail(SoftwareUpdate, "ApplyUpdate failure response %" CHIP_ERROR_FORMAT, error.Format()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kApplying, error); + requestorCore->RecordErrorUpdateState(error); } void DefaultOTARequestor::OnNotifyUpdateAppliedResponse(void * context, const app::DataModel::NullObjectType & response) {} @@ -295,7 +305,7 @@ void DefaultOTARequestor::OnNotifyUpdateAppliedFailure(void * context, CHIP_ERRO VerifyOrDie(requestorCore != nullptr); ChipLogDetail(SoftwareUpdate, "NotifyUpdateApplied failure response %" CHIP_ERROR_FORMAT, error.Format()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kNotifying, error); + requestorCore->RecordErrorUpdateState(error); } void DefaultOTARequestor::Reset() @@ -339,17 +349,12 @@ void DefaultOTARequestor::HandleAnnounceOTAProvider(app::CommandHandler * comman void DefaultOTARequestor::ConnectToProvider(OnConnectedAction onConnectedAction) { - if (mServer == nullptr) - { - ChipLogError(SoftwareUpdate, "Server not set"); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); - return; - } + VerifyOrDie(mServer != nullptr); if (!mProviderLocation.HasValue()) { ChipLogError(SoftwareUpdate, "Provider location not set"); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); + RecordErrorUpdateState(CHIP_ERROR_INCORRECT_STATE); return; } @@ -358,7 +363,7 @@ void DefaultOTARequestor::ConnectToProvider(OnConnectedAction onConnectedAction) if (fabricInfo == nullptr) { ChipLogError(SoftwareUpdate, "Cannot find fabric"); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); + RecordErrorUpdateState(CHIP_ERROR_INCORRECT_STATE); return; } @@ -373,24 +378,19 @@ void DefaultOTARequestor::ConnectToProvider(OnConnectedAction onConnectedAction) if (err != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "Cannot establish connection to provider: %" CHIP_ERROR_FORMAT, err.Format()); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); + RecordErrorUpdateState(CHIP_ERROR_INCORRECT_STATE); return; } } void DefaultOTARequestor::DisconnectFromProvider() { - if (mServer == nullptr) - { - ChipLogError(SoftwareUpdate, "Server not set"); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); - return; - } + VerifyOrDie(mServer != nullptr); if (!mProviderLocation.HasValue()) { ChipLogError(SoftwareUpdate, "Provider location not set"); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); + RecordErrorUpdateState(CHIP_ERROR_INCORRECT_STATE); return; } @@ -398,7 +398,7 @@ void DefaultOTARequestor::DisconnectFromProvider() if (fabricInfo == nullptr) { ChipLogError(SoftwareUpdate, "Cannot find fabric"); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); + RecordErrorUpdateState(CHIP_ERROR_INCORRECT_STATE); return; } @@ -444,7 +444,7 @@ void DefaultOTARequestor::OnConnected(void * context, OperationalDeviceProxy * d if (err != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "Failed to send QueryImage command: %" CHIP_ERROR_FORMAT, err.Format()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, err); + requestorCore->RecordErrorUpdateState(err); return; } break; @@ -455,7 +455,7 @@ void DefaultOTARequestor::OnConnected(void * context, OperationalDeviceProxy * d if (err != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "Failed to start download: %" CHIP_ERROR_FORMAT, err.Format()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kDownloading, err); + requestorCore->RecordErrorUpdateState(err); return; } break; @@ -466,7 +466,7 @@ void DefaultOTARequestor::OnConnected(void * context, OperationalDeviceProxy * d if (err != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "Failed to send ApplyUpdate command: %" CHIP_ERROR_FORMAT, err.Format()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kApplying, err); + requestorCore->RecordErrorUpdateState(err); return; } break; @@ -477,7 +477,7 @@ void DefaultOTARequestor::OnConnected(void * context, OperationalDeviceProxy * d if (err != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "Failed to send NotifyUpdateApplied command: %" CHIP_ERROR_FORMAT, err.Format()); - requestorCore->RecordErrorUpdateState(UpdateFailureState::kNotifying, err); + requestorCore->RecordErrorUpdateState(err); return; } break; @@ -499,13 +499,13 @@ void DefaultOTARequestor::OnConnectionFailure(void * context, PeerId peerId, CHI switch (requestorCore->mOnConnectedAction) { case kQueryImage: - requestorCore->RecordErrorUpdateState(UpdateFailureState::kQuerying, error); + requestorCore->RecordErrorUpdateState(error); break; case kDownload: - requestorCore->RecordErrorUpdateState(UpdateFailureState::kDownloading, error); + requestorCore->RecordErrorUpdateState(error); break; case kApplyUpdate: - requestorCore->RecordErrorUpdateState(UpdateFailureState::kApplying, error); + requestorCore->RecordErrorUpdateState(error); break; default: break; @@ -567,7 +567,7 @@ void DefaultOTARequestor::NotifyUpdateApplied() if (DeviceLayer::ConfigurationMgr().GetProductId(productId) != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "Cannot get Product ID"); - RecordErrorUpdateState(UpdateFailureState::kUnknown, CHIP_ERROR_INCORRECT_STATE); + RecordErrorUpdateState(CHIP_ERROR_INCORRECT_STATE); return; } @@ -618,8 +618,7 @@ void DefaultOTARequestor::OnDownloadStateChanged(OTADownloader::State state, OTA case OTADownloader::State::kIdle: if (reason != OTAChangeReasonEnum::kSuccess) { - // TODO: Should we call some driver API to give it a chance to reschedule? - RecordErrorUpdateState(UpdateFailureState::kDownloading, CHIP_ERROR_CONNECTION_ABORTED, reason); + RecordErrorUpdateState(CHIP_ERROR_CONNECTION_ABORTED, reason); } break; @@ -669,13 +668,12 @@ void DefaultOTARequestor::RecordNewUpdateState(OTAUpdateStateEnum newState, OTAC } OtaRequestorServerOnStateTransition(mCurrentUpdateState, newState, reason, targetSoftwareVersion); - // Issue#16151 tracks re-factoring error and state transitioning handling. if ((newState == OTAUpdateStateEnum::kIdle) && (mCurrentUpdateState != OTAUpdateStateEnum::kIdle)) { IdleStateReason idleStateReason = MapErrorToIdleStateReason(error); // Inform the driver that the core logic has entered the Idle state - mOtaRequestorDriver->HandleIdleState(idleStateReason); + mOtaRequestorDriver->HandleIdleStateEnter(idleStateReason); } else if ((mCurrentUpdateState == OTAUpdateStateEnum::kIdle) && (newState != OTAUpdateStateEnum::kIdle)) { @@ -685,11 +683,8 @@ void DefaultOTARequestor::RecordNewUpdateState(OTAUpdateStateEnum newState, OTAC mCurrentUpdateState = newState; } -void DefaultOTARequestor::RecordErrorUpdateState(UpdateFailureState failureState, CHIP_ERROR error, OTAChangeReasonEnum reason) +void DefaultOTARequestor::RecordErrorUpdateState(CHIP_ERROR error, OTAChangeReasonEnum reason) { - // Inform driver of the error - mOtaRequestorDriver->HandleError(failureState, error); - // Log the DownloadError event OTAImageProcessorInterface * imageProcessor = mBdxDownloader->GetImageProcessorDelegate(); VerifyOrDie(imageProcessor != nullptr); diff --git a/src/app/clusters/ota-requestor/DefaultOTARequestor.h b/src/app/clusters/ota-requestor/DefaultOTARequestor.h index ef42419343f9b5..5d5000f22a1e91 100644 --- a/src/app/clusters/ota-requestor/DefaultOTARequestor.h +++ b/src/app/clusters/ota-requestor/DefaultOTARequestor.h @@ -196,10 +196,9 @@ class DefaultOTARequestor : public OTARequestorInterface, public BDXDownloader:: void RecordNewUpdateState(OTAUpdateStateEnum newState, OTAChangeReasonEnum reason, CHIP_ERROR error = CHIP_NO_ERROR); /** - * Record the error update state by informing the driver of the error and calling `RecordNewUpdateState` + * Record the error update state and transition to the idle state */ - void RecordErrorUpdateState(UpdateFailureState failureState, CHIP_ERROR error, - OTAChangeReasonEnum reason = OTAChangeReasonEnum::kFailure); + void RecordErrorUpdateState(CHIP_ERROR error, OTAChangeReasonEnum reason = OTAChangeReasonEnum::kFailure); /** * Generate an update token using the operational node ID in case of token lost, received in QueryImageResponse diff --git a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp index 14387898409a95..7e710f7c2bd5cb 100644 --- a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp +++ b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp @@ -119,15 +119,13 @@ bool DefaultOTARequestorDriver::ProviderLocationsEqual(const ProviderLocationTyp return false; } -void DefaultOTARequestorDriver::HandleError(UpdateFailureState state, CHIP_ERROR error) {} - void DefaultOTARequestorDriver::HandleIdleStateExit() { // Start watchdog timer to monitor new Query Image session StartSelectedTimer(SelectedTimer::kWatchdogTimer); } -void DefaultOTARequestorDriver::HandleIdleState(IdleStateReason reason) +void DefaultOTARequestorDriver::HandleIdleStateEnter(IdleStateReason reason) { switch (reason) { @@ -156,62 +154,30 @@ void DefaultOTARequestorDriver::UpdateAvailable(const UpdateDescription & update delay, [](System::Layer *, void * context) { ToDriver(context)->mRequestor->DownloadUpdate(); }, this); } -void DefaultOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, System::Clock::Seconds32 delay) +CHIP_ERROR DefaultOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, System::Clock::Seconds32 delay) { - VerifyOrDie(mRequestor != nullptr); - - ProviderLocationType providerLocation; - bool willTryAnotherQuery = false; + CHIP_ERROR status = CHIP_NO_ERROR; switch (reason) { case UpdateNotFoundReason::kUpToDate: - willTryAnotherQuery = false; break; - case UpdateNotFoundReason::kBusy: - willTryAnotherQuery = true; - if (mProviderRetryCount <= kMaxBusyProviderRetryCount) + case UpdateNotFoundReason::kBusy: { + status = ScheduleQueryRetry(true); + if (status == CHIP_ERROR_MAX_RETRY_EXCEEDED) { - break; - } - ChipLogProgress(SoftwareUpdate, "Max Busy Provider retries reached. Attempting to get next Provider."); - __attribute__((fallthrough)); // fallthrough - case UpdateNotFoundReason::kNotAvailable: { - // IMPLEMENTATION CHOICE: - // This implementation schedules a query only if a different provider is available - // Note that the "listExhausted" being set to TRUE, implies that the entire list of - // defaultOTAProviders has been traversed. On bootup, the last provider is reset - // which ensures that every QueryImage call will ensure that the list is traversed from - // start to end, until an OTA is successfully completed. - bool listExhausted = false; - if ((GetNextProviderLocation(providerLocation, listExhausted) != true) || (listExhausted == true)) - { - willTryAnotherQuery = false; - } - else - { - willTryAnotherQuery = true; - mRequestor->SetCurrentProviderLocation(providerLocation); + // If max retry exceeded with current provider, try a different provider + status = ScheduleQueryRetry(false); } break; } + case UpdateNotFoundReason::kNotAvailable: { + // Schedule a query only if a different provider is available + status = ScheduleQueryRetry(false); + break; } - - if (delay < kDefaultDelayedActionTime) - { - delay = kDefaultDelayedActionTime; - } - - if (willTryAnotherQuery == true) - { - ChipLogProgress(SoftwareUpdate, "UpdateNotFound, scheduling a retry"); - ScheduleDelayedAction(delay, StartDelayTimerHandler, this); - } - else - { - ChipLogProgress(SoftwareUpdate, "UpdateNotFound, not scheduling further retries"); - StartSelectedTimer(SelectedTimer::kPeriodicQueryTimer); } + return status; } void DefaultOTARequestorDriver::UpdateDownloaded() @@ -247,9 +213,6 @@ void DefaultOTARequestorDriver::UpdateDiscontinued() // Cancel all update timers UpdateCancelled(); - - // Restart the periodic default provider timer - StartSelectedTimer(SelectedTimer::kPeriodicQueryTimer); } // Cancel all OTA update timers @@ -322,6 +285,7 @@ void DefaultOTARequestorDriver::ProcessAnnounceOTAProviders( void DefaultOTARequestorDriver::SendQueryImage() { + OTAUpdateStateEnum currentUpdateState; Optional lastUsedProvider; mRequestor->GetProviderLocation(lastUsedProvider); if (!lastUsedProvider.HasValue()) @@ -338,17 +302,17 @@ void DefaultOTARequestorDriver::SendQueryImage() return; } } - // IMPLEMENTATION CHOICE - // In this implementation explicitly triggering a query cancels any in-progress update. - UpdateCancelled(); - - // Default provider timer only runs when there is no ongoing query/update; must stop it now. - // TriggerImmediateQueryInternal() will cause the state to change from kIdle, which will start - // the Watchdog timer. (Clean this up with Issue#16151) - - mProviderRetryCount++; - DeviceLayer::SystemLayer().ScheduleLambda([this] { mRequestor->TriggerImmediateQueryInternal(); }); + currentUpdateState = mRequestor->GetCurrentUpdateState(); + if ((currentUpdateState == OTAUpdateStateEnum::kIdle) || (currentUpdateState == OTAUpdateStateEnum::kDelayedOnQuery)) + { + mProviderRetryCount++; + DeviceLayer::SystemLayer().ScheduleLambda([this] { mRequestor->TriggerImmediateQueryInternal(); }); + } + else + { + ChipLogProgress(SoftwareUpdate, "Query already in progress"); + } } void DefaultOTARequestorDriver::PeriodicQueryTimerHandler(System::Layer * systemLayer, void * appState) @@ -465,5 +429,45 @@ bool DefaultOTARequestorDriver::GetNextProviderLocation(ProviderLocationType & p return false; } +CHIP_ERROR DefaultOTARequestorDriver::ScheduleQueryRetry(bool trySameProvider) +{ + CHIP_ERROR status = CHIP_NO_ERROR; + + if (trySameProvider == false) + { + VerifyOrDie(mRequestor != nullptr); + + ProviderLocationType providerLocation; + bool listExhausted = false; + + // Note that the "listExhausted" being set to TRUE, implies that the entire list of + // defaultOTAProviders has been traversed. On bootup, the last provider is reset + // which ensures that every QueryImage call will ensure that the list is traversed from + // start to end, until an OTA is successfully completed. + if ((GetNextProviderLocation(providerLocation, listExhausted) != true) || (listExhausted == true)) + { + status = CHIP_ERROR_PROVIDER_LIST_EXHAUSTED; + } + else + { + mRequestor->SetCurrentProviderLocation(providerLocation); + } + } + + if (mProviderRetryCount > kMaxBusyProviderRetryCount) + { + ChipLogProgress(SoftwareUpdate, "Max retry of %u exceeded. Will not retry", kMaxBusyProviderRetryCount); + status = CHIP_ERROR_MAX_RETRY_EXCEEDED; + } + + if (status == CHIP_NO_ERROR) + { + ChipLogProgress(SoftwareUpdate, "Scheduling a retry"); + ScheduleDelayedAction(kDefaultDelayedActionTime, StartDelayTimerHandler, this); + } + + return status; +} + } // namespace DeviceLayer } // namespace chip diff --git a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h index f0948dd3d22a96..24df0922156433 100644 --- a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h +++ b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h @@ -65,11 +65,11 @@ class DefaultOTARequestorDriver : public OTARequestorDriver bool CanConsent() override; uint16_t GetMaxDownloadBlockSize() override; void SetMaxDownloadBlockSize(uint16_t maxDownloadBlockSize) override; - void HandleError(UpdateFailureState state, CHIP_ERROR error) override; + void HandleIdleStateExit() override; - void HandleIdleState(IdleStateReason reason) override; + void HandleIdleStateEnter(IdleStateReason reason) override; void UpdateAvailable(const UpdateDescription & update, System::Clock::Seconds32 delay) override; - void UpdateNotFound(UpdateNotFoundReason reason, System::Clock::Seconds32 delay) override; + CHIP_ERROR UpdateNotFound(UpdateNotFoundReason reason, System::Clock::Seconds32 delay) override; void UpdateDownloaded() override; void UpdateConfirmed(System::Clock::Seconds32 delay) override; void UpdateSuspended(System::Clock::Seconds32 delay) override; @@ -92,6 +92,8 @@ class DefaultOTARequestorDriver : public OTARequestorDriver void ScheduleDelayedAction(System::Clock::Seconds32 delay, System::TimerCompleteCallback action, void * aAppState); void CancelDelayedAction(System::TimerCompleteCallback action, void * aAppState); bool ProviderLocationsEqual(const ProviderLocationType & a, const ProviderLocationType & b); + // Return value of CHIP_NO_ERROR indicates a query retry has been successfully scheduled. + CHIP_ERROR ScheduleQueryRetry(bool trySameProvider); OTARequestorInterface * mRequestor = nullptr; OTAImageProcessorInterface * mImageProcessor = nullptr; diff --git a/src/app/clusters/ota-requestor/ExtendedOTARequestorDriver.cpp b/src/app/clusters/ota-requestor/ExtendedOTARequestorDriver.cpp index 1afe2edec6c6a2..07f96e9e00041e 100644 --- a/src/app/clusters/ota-requestor/ExtendedOTARequestorDriver.cpp +++ b/src/app/clusters/ota-requestor/ExtendedOTARequestorDriver.cpp @@ -49,7 +49,6 @@ void ExtendedOTARequestorDriver::UpdateAvailable(const UpdateDescription & updat if (err != CHIP_NO_ERROR) { ChipLogError(SoftwareUpdate, "Failed to get user consent subject"); - HandleError(UpdateFailureState::kDelayedOnUserConsent, err); return; } diff --git a/src/app/clusters/ota-requestor/OTARequestorDriver.h b/src/app/clusters/ota-requestor/OTARequestorDriver.h index d3856d17543e12..aee6f8fc137914 100644 --- a/src/app/clusters/ota-requestor/OTARequestorDriver.h +++ b/src/app/clusters/ota-requestor/OTARequestorDriver.h @@ -42,18 +42,6 @@ struct UpdateDescription ByteSpan metadataForRequestor; }; -enum class UpdateFailureState -{ - kUnknown, - kIdle, - kQuerying, - kDownloading, - kApplying, - kNotifying, - kAwaitingNextAction, - kDelayedOnUserConsent, -}; - enum class UpdateNotFoundReason { kBusy, @@ -94,20 +82,17 @@ class OTARequestorDriver /// Set maximum supported download block size virtual void SetMaxDownloadBlockSize(uint16_t maxDownloadBlockSize) = 0; - /// Called when an error occurs at any OTA requestor operation - virtual void HandleError(UpdateFailureState state, CHIP_ERROR error) = 0; - /// Called when OTA Requestor has exited the Idle state for which the driver may need to take various actions virtual void HandleIdleStateExit() = 0; // Called when the OTA Requestor has entered the Idle state for which the driver may need to take various actions - virtual void HandleIdleState(IdleStateReason reason) = 0; + virtual void HandleIdleStateEnter(IdleStateReason reason) = 0; /// Called when the latest query found a software update virtual void UpdateAvailable(const UpdateDescription & update, System::Clock::Seconds32 delay) = 0; /// Called when the latest query did not find any software update - virtual void UpdateNotFound(UpdateNotFoundReason reason, System::Clock::Seconds32 delay) = 0; + virtual CHIP_ERROR UpdateNotFound(UpdateNotFoundReason reason, System::Clock::Seconds32 delay) = 0; /// Called when the download of a new software image has finished virtual void UpdateDownloaded() = 0; diff --git a/src/lib/core/CHIPError.cpp b/src/lib/core/CHIPError.cpp index 95476499a175a8..b942db6f52a29e 100644 --- a/src/lib/core/CHIPError.cpp +++ b/src/lib/core/CHIPError.cpp @@ -677,6 +677,12 @@ bool FormatCHIPError(char * buf, uint16_t bufSize, CHIP_ERROR err) case CHIP_ERROR_BUSY.AsInteger(): desc = "The Resource is busy and cannot process the request"; break; + case CHIP_ERROR_MAX_RETRY_EXCEEDED.AsInteger(): + desc = "The maximum retry limit has been exceeded"; + break; + case CHIP_ERROR_PROVIDER_LIST_EXHAUSTED.AsInteger(): + desc = "The provider list has been exhausted"; + break; } #endif // !CHIP_CONFIG_SHORT_ERROR_STR diff --git a/src/lib/core/CHIPError.h b/src/lib/core/CHIPError.h index 82bab653298940..b952609ad46ad3 100644 --- a/src/lib/core/CHIPError.h +++ b/src/lib/core/CHIPError.h @@ -2372,6 +2372,22 @@ using CHIP_ERROR = ::chip::ChipError; */ #define CHIP_ERROR_BUSY CHIP_CORE_ERROR(0xdb) +/** + * @def CHIP_ERROR_MAX_RETRY_EXCEEDED + * + * @brief + * The maximum retry limit has been exceeded. + */ +#define CHIP_ERROR_MAX_RETRY_EXCEEDED CHIP_CORE_ERROR(0xdc) + +/** + * @def CHIP_ERROR_PROVIDER_LIST_EXHAUSTED + * + * @brief + * The provider list has been exhausted. + */ +#define CHIP_ERROR_PROVIDER_LIST_EXHAUSTED CHIP_CORE_ERROR(0xdd) + /** * @} */ @@ -2379,8 +2395,8 @@ using CHIP_ERROR = ::chip::ChipError; // clang-format on // !!!!! IMPORTANT !!!!! If you add new CHIP errors, please update the translation -// of error codes to strings in CHIPError.cpp, and add them to unittest -// in test-apps/TestErrorStr.cpp +// of error codes to strings in CHIPError.cpp, and add them to kTestElements[] +// in core/tests/TestCHIPErrorStr.cpp namespace chip { diff --git a/src/lib/core/tests/TestCHIPErrorStr.cpp b/src/lib/core/tests/TestCHIPErrorStr.cpp index 3442f0f33dc2be..7a36e02f9488dc 100644 --- a/src/lib/core/tests/TestCHIPErrorStr.cpp +++ b/src/lib/core/tests/TestCHIPErrorStr.cpp @@ -218,6 +218,8 @@ static const CHIP_ERROR kTestElements[] = CHIP_ERROR_IM_MALFORMED_STATUS_CODE, CHIP_ERROR_PEER_NODE_NOT_FOUND, CHIP_ERROR_IM_STATUS_CODE_RECEIVED, + CHIP_ERROR_MAX_RETRY_EXCEEDED, + CHIP_ERROR_PROVIDER_LIST_EXHAUSTED, }; // clang-format on