From 3f2ddd8e8ba96d0d1d0c5f7376168fa4e2c5bf7a Mon Sep 17 00:00:00 2001 From: Alexander Sehr Date: Thu, 18 Jul 2024 17:05:00 +0200 Subject: [PATCH] fix: Added retry to post-removal logic (#2705) ## Description I noticed a few times now that especially the KeyVault post-removal command sometimes fails with a generic error (sending a request). For the implementation today, that unfortunately means that the removal script throws an exception and stops - leaving a lot of resources still in Azure. Even more unfortunate is the fact that the purge actually worked in those cases and I can only assume that the cmdlet has an internal issue checking the latest state of the purge. To help the matter in a generic way (that it, address the same for other post-removal cases), I added a retry logic similar to our default resource removal. All our post-removal script blocks are written in a way that they can cope with a retry as they always check first if they can find the soft-deleted resource before doing anything. Example [run](https://github.com/Azure/bicep-registry-modules/actions/runs/9886800221/job/27331338137#step:4:885) ``` VERBOSE: [-] Removing resource [dep-***-law-kvvwaf] of type [Microsoft.OperationalInsights/workspaces] VERBOSE: [*] Purging resource [dep-***-law-kvvwaf] of type [Microsoft.OperationalInsights/workspaces] VERBOSE: [-] Removing resource [***kvvwaf002] of type [Microsoft.KeyVault/vaults] VERBOSE: [*] Purging resource [***kvvwaf002] of type [Microsoft.KeyVault/vaults] WARNING: [!] Post-removal operation failed. Reason: [An error occurred while sending the request.]. Retry [1/3] VERBOSE: [-] Removing resource [dep-***-msi-kvvwaf] of type [Microsoft.ManagedIdentity/userAssignedIdentities] ``` ## Pipeline Reference | Pipeline | | -------- | | [![avm.res.key-vault.vault](https://github.com/Azure/bicep-registry-modules/actions/workflows/avm.res.key-vault.vault.yml/badge.svg?branch=users%2Falsehr%2FkvltPurgeRobustness&event=workflow_dispatch)](https://github.com/Azure/bicep-registry-modules/actions/workflows/avm.res.key-vault.vault.yml) | ## Type of Change - [x] Update to CI Environment or utilities (Non-module affecting changes) - [ ] Azure Verified Module updates: - [ ] Bugfix containing backwards-compatible bug fixes, and I have NOT bumped the MAJOR or MINOR version in `version.json`: - [ ] Someone has opened a bug report issue, and I have included "Closes #{bug_report_issue_number}" in the PR description. - [ ] The bug was found by the module author, and no one has opened an issue to report it yet. - [ ] Feature update backwards compatible feature updates, and I have bumped the MINOR version in `version.json`. - [ ] Breaking changes and I have bumped the MAJOR version in `version.json`. - [ ] Update to documentation --- .../helper/Invoke-ResourcePostRemoval.ps1 | 287 +++++++++--------- 1 file changed, 151 insertions(+), 136 deletions(-) diff --git a/avm/utilities/pipelines/e2eValidation/resourceRemoval/helper/Invoke-ResourcePostRemoval.ps1 b/avm/utilities/pipelines/e2eValidation/resourceRemoval/helper/Invoke-ResourcePostRemoval.ps1 index 25b068284f..d78e5860cd 100644 --- a/avm/utilities/pipelines/e2eValidation/resourceRemoval/helper/Invoke-ResourcePostRemoval.ps1 +++ b/avm/utilities/pipelines/e2eValidation/resourceRemoval/helper/Invoke-ResourcePostRemoval.ps1 @@ -11,6 +11,9 @@ Mandatory. The resourceID of the resource to remove .PARAMETER Type Mandatory. The type of the resource to remove +.PARAMETER PostRemovalRetryLimit +Optional. Specify the number of times the script should try to repeat the post-removal operation in case of a failure. + .EXAMPLE Invoke-ResourcePostRemoval -Type 'Microsoft.KeyVault/vaults' -ResourceId '/subscriptions/.../resourceGroups/validation-rg/providers/Microsoft.KeyVault/vaults/myVault' @@ -24,157 +27,169 @@ function Invoke-ResourcePostRemoval { [string] $ResourceId, [Parameter(Mandatory = $true)] - [string] $Type - ) + [string] $Type, - switch ($Type) { - 'Microsoft.AppConfiguration/configurationStores' { - $subscriptionId = $ResourceId.Split('/')[2] - $resourceName = Split-Path $ResourceId -Leaf + [Parameter(Mandatory = $false)] + [int] $PostRemovalRetryLimit = 3 + ) - # Fetch service in soft-delete - $getPath = '/subscriptions/{0}/providers/Microsoft.AppConfiguration/deletedConfigurationStores?api-version=2021-10-01-preview' -f $subscriptionId - $getRequestInputObject = @{ - Method = 'GET' - Path = $getPath - } - $softDeletedConfigurationStore = ((Invoke-AzRestMethod @getRequestInputObject).Content | ConvertFrom-Json).value | Where-Object { $_.properties.configurationStoreId -eq $ResourceId } - - if ($softDeletedConfigurationStore) { - # Purge service - $purgePath = '/subscriptions/{0}/providers/Microsoft.AppConfiguration/locations/{1}/deletedConfigurationStores/{2}/purge?api-version=2021-10-01-preview' -f $subscriptionId, $softDeletedConfigurationStore.properties.location, $resourceName - $purgeRequestInputObject = @{ - Method = 'POST' - Path = $purgePath - } - Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose - if ($PSCmdlet.ShouldProcess(('App Configuration Store with ID [{0}]' -f $softDeletedConfigurationStore.properties.configurationStoreId), 'Purge')) { - $response = Invoke-AzRestMethod @purgeRequestInputObject - if ($response.StatusCode -ne 200) { - throw ('Purge of resource [{0}] failed with error code [{1}]' -f $ResourceId, $response.StatusCode) + $postRemovalRetryCount = 1 + do { + try { + switch ($Type) { + 'Microsoft.AppConfiguration/configurationStores' { + $subscriptionId = $ResourceId.Split('/')[2] + $resourceName = Split-Path $ResourceId -Leaf + + # Fetch service in soft-delete + $getPath = '/subscriptions/{0}/providers/Microsoft.AppConfiguration/deletedConfigurationStores?api-version=2021-10-01-preview' -f $subscriptionId + $getRequestInputObject = @{ + Method = 'GET' + Path = $getPath } - } - } - break - } - 'Microsoft.KeyVault/vaults' { - $resourceName = Split-Path $ResourceId -Leaf - - $matchingKeyVault = Get-AzKeyVault -InRemovedState | Where-Object { $_.resourceId -eq $ResourceId } - if ($matchingKeyVault -and -not $matchingKeyVault.EnablePurgeProtection) { - Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose - if ($PSCmdlet.ShouldProcess(('Key Vault with ID [{0}]' -f $matchingKeyVault.Id), 'Purge')) { - try { - $null = Remove-AzKeyVault -ResourceId $matchingKeyVault.Id -InRemovedState -Force -Location $matchingKeyVault.Location -ErrorAction 'Stop' - } catch { - if ($_.Exception.Message -like '*DeletedVaultPurge*') { - Write-Warning ('Purge protection for key vault [{0}] enabled. Skipping. Scheduled purge date is [{1}]' -f $resourceName, $matchingKeyVault.ScheduledPurgeDate) - } else { - throw $_ + $softDeletedConfigurationStore = ((Invoke-AzRestMethod @getRequestInputObject).Content | ConvertFrom-Json).value | Where-Object { $_.properties.configurationStoreId -eq $ResourceId } + + if ($softDeletedConfigurationStore) { + # Purge service + $purgePath = '/subscriptions/{0}/providers/Microsoft.AppConfiguration/locations/{1}/deletedConfigurationStores/{2}/purge?api-version=2021-10-01-preview' -f $subscriptionId, $softDeletedConfigurationStore.properties.location, $resourceName + $purgeRequestInputObject = @{ + Method = 'POST' + Path = $purgePath + } + Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose + if ($PSCmdlet.ShouldProcess(('App Configuration Store with ID [{0}]' -f $softDeletedConfigurationStore.properties.configurationStoreId), 'Purge')) { + $response = Invoke-AzRestMethod @purgeRequestInputObject + if ($response.StatusCode -ne 200) { + throw ('Purge of resource [{0}] failed with error code [{1}]' -f $ResourceId, $response.StatusCode) + } } } + break } - } - break - } - 'Microsoft.CognitiveServices/accounts' { - $resourceGroupName = $ResourceId.Split('/')[4] - $resourceName = Split-Path $ResourceId -Leaf - - $matchingAccount = Get-AzCognitiveServicesAccount -InRemovedState | Where-Object { $_.AccountName -eq $resourceName } - if ($matchingAccount) { - Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose - if ($PSCmdlet.ShouldProcess(('Cognitive services account with ID [{0}]' -f $matchingAccount.Id), 'Purge')) { - $null = Remove-AzCognitiveServicesAccount -InRemovedState -Force -Location $matchingAccount.Location -ResourceGroupName $resourceGroupName -Name $matchingAccount.AccountName - } - } - break - } - 'Microsoft.ApiManagement/service' { - $subscriptionId = $ResourceId.Split('/')[2] - $resourceName = Split-Path $ResourceId -Leaf - - # Fetch service in soft-delete - $getPath = '/subscriptions/{0}/providers/Microsoft.ApiManagement/deletedservices?api-version=2021-08-01' -f $subscriptionId - $getRequestInputObject = @{ - Method = 'GET' - Path = $getPath - } - $softDeletedService = ((Invoke-AzRestMethod @getRequestInputObject).Content | ConvertFrom-Json).value | Where-Object { $_.properties.serviceId -eq $ResourceId } - - if ($softDeletedService) { - # Purge service - $purgePath = '/subscriptions/{0}/providers/Microsoft.ApiManagement/locations/{1}/deletedservices/{2}?api-version=2020-06-01-preview' -f $subscriptionId, $softDeletedService.location, $resourceName - $purgeRequestInputObject = @{ - Method = 'DELETE' - Path = $purgePath + 'Microsoft.KeyVault/vaults' { + $resourceName = Split-Path $ResourceId -Leaf + + $matchingKeyVault = Get-AzKeyVault -InRemovedState | Where-Object { $_.resourceId -eq $ResourceId } + if ($matchingKeyVault -and -not $matchingKeyVault.EnablePurgeProtection) { + Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose + if ($PSCmdlet.ShouldProcess(('Key Vault with ID [{0}]' -f $matchingKeyVault.Id), 'Purge')) { + try { + $null = Remove-AzKeyVault -ResourceId $matchingKeyVault.Id -InRemovedState -Force -Location $matchingKeyVault.Location -ErrorAction 'Stop' + } catch { + if ($_.Exception.Message -like '*DeletedVaultPurge*') { + Write-Warning ('Purge protection for key vault [{0}] enabled. Skipping. Scheduled purge date is [{1}]' -f $resourceName, $matchingKeyVault.ScheduledPurgeDate) + } else { + throw $_ + } + } + } + } + break } - Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose - if ($PSCmdlet.ShouldProcess(('API management service with ID [{0}]' -f $softDeletedService.properties.serviceId), 'Purge')) { - $null = Invoke-AzRestMethod @purgeRequestInputObject + 'Microsoft.CognitiveServices/accounts' { + $resourceGroupName = $ResourceId.Split('/')[4] + $resourceName = Split-Path $ResourceId -Leaf + + $matchingAccount = Get-AzCognitiveServicesAccount -InRemovedState | Where-Object { $_.AccountName -eq $resourceName } + if ($matchingAccount) { + Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose + if ($PSCmdlet.ShouldProcess(('Cognitive services account with ID [{0}]' -f $matchingAccount.Id), 'Purge')) { + $null = Remove-AzCognitiveServicesAccount -InRemovedState -Force -Location $matchingAccount.Location -ResourceGroupName $resourceGroupName -Name $matchingAccount.AccountName + } + } + break } - } - break - } - 'Microsoft.RecoveryServices/vaults/backupFabrics/protectionContainers/protectedItems' { - # Remove protected VM - # Required if e.g. a VM was listed in an RSV and only that VM is removed - $vaultId = $ResourceId.split('/backupFabrics/')[0] - $resourceName = Split-Path $ResourceId -Leaf - $softDeleteStatus = (Get-AzRecoveryServicesVaultProperty -VaultId $vaultId).SoftDeleteFeatureState - if ($softDeleteStatus -ne 'Disabled') { - if ($PSCmdlet.ShouldProcess(('Soft-delete on RSV [{0}]' -f $vaultId), 'Set')) { - $null = Set-AzRecoveryServicesVaultProperty -VaultId $vaultId -SoftDeleteFeatureState 'Disable' + 'Microsoft.ApiManagement/service' { + $subscriptionId = $ResourceId.Split('/')[2] + $resourceName = Split-Path $ResourceId -Leaf + + # Fetch service in soft-delete + $getPath = '/subscriptions/{0}/providers/Microsoft.ApiManagement/deletedservices?api-version=2021-08-01' -f $subscriptionId + $getRequestInputObject = @{ + Method = 'GET' + Path = $getPath + } + $softDeletedService = ((Invoke-AzRestMethod @getRequestInputObject).Content | ConvertFrom-Json).value | Where-Object { $_.properties.serviceId -eq $ResourceId } + + if ($softDeletedService) { + # Purge service + $purgePath = '/subscriptions/{0}/providers/Microsoft.ApiManagement/locations/{1}/deletedservices/{2}?api-version=2020-06-01-preview' -f $subscriptionId, $softDeletedService.location, $resourceName + $purgeRequestInputObject = @{ + Method = 'DELETE' + Path = $purgePath + } + Write-Verbose ('[*] Purging resource [{0}] of type [{1}]' -f $resourceName, $Type) -Verbose + if ($PSCmdlet.ShouldProcess(('API management service with ID [{0}]' -f $softDeletedService.properties.serviceId), 'Purge')) { + $null = Invoke-AzRestMethod @purgeRequestInputObject + } + } + break } - } - - $backupItemInputObject = @{ - BackupManagementType = 'AzureVM' - WorkloadType = 'AzureVM' - VaultId = $vaultId - Name = $resourceName - } - if ($backupItem = Get-AzRecoveryServicesBackupItem @backupItemInputObject -ErrorAction 'SilentlyContinue') { - Write-Verbose (' [-] Removing Backup item [{0}] from RSV [{1}]' -f $backupItem.Name, $vaultId) -Verbose + 'Microsoft.RecoveryServices/vaults/backupFabrics/protectionContainers/protectedItems' { + # Remove protected VM + # Required if e.g. a VM was listed in an RSV and only that VM is removed + $vaultId = $ResourceId.split('/backupFabrics/')[0] + $resourceName = Split-Path $ResourceId -Leaf + $softDeleteStatus = (Get-AzRecoveryServicesVaultProperty -VaultId $vaultId).SoftDeleteFeatureState + if ($softDeleteStatus -ne 'Disabled') { + if ($PSCmdlet.ShouldProcess(('Soft-delete on RSV [{0}]' -f $vaultId), 'Set')) { + $null = Set-AzRecoveryServicesVaultProperty -VaultId $vaultId -SoftDeleteFeatureState 'Disable' + } + } - if ($backupItem.DeleteState -eq 'ToBeDeleted') { - if ($PSCmdlet.ShouldProcess('Soft-deleted backup data removal', 'Undo')) { - $null = Undo-AzRecoveryServicesBackupItemDeletion -Item $backupItem -VaultId $vaultId -Force + $backupItemInputObject = @{ + BackupManagementType = 'AzureVM' + WorkloadType = 'AzureVM' + VaultId = $vaultId + Name = $resourceName } - } + if ($backupItem = Get-AzRecoveryServicesBackupItem @backupItemInputObject -ErrorAction 'SilentlyContinue') { + Write-Verbose (' [-] Removing Backup item [{0}] from RSV [{1}]' -f $backupItem.Name, $vaultId) -Verbose - if ($PSCmdlet.ShouldProcess(('Backup item [{0}] from RSV [{1}]' -f $backupItem.Name, $vaultId), 'Remove')) { - $null = Disable-AzRecoveryServicesBackupProtection -Item $backupItem -VaultId $vaultId -RemoveRecoveryPoints -Force - } - } + if ($backupItem.DeleteState -eq 'ToBeDeleted') { + if ($PSCmdlet.ShouldProcess('Soft-deleted backup data removal', 'Undo')) { + $null = Undo-AzRecoveryServicesBackupItemDeletion -Item $backupItem -VaultId $vaultId -Force + } + } - # Undo a potential soft delete state change - $null = Set-AzRecoveryServicesVaultProperty -VaultId $vaultId -SoftDeleteFeatureState $softDeleteStatus.TrimEnd('d') - break - } - 'Microsoft.Databricks/workspaces' { - $resourceGroupName = $ResourceId.Split('/')[4] - $resourceName = Split-Path $ResourceId -Leaf - # If the `managedResourceGroupResourceId` parameter was set during deployment, we should look for that resource group and remove it as it is not automatically removed - # NOTE: This requires that the provided value uses the prefix `rg-` and suffix '-managed' - $managedResourceGroupName = "rg-$resourceGroupName-managed" - if (Get-AzResourceGroup -Name $managedResourceGroupName -ErrorAction 'SilentlyContinue') { - Write-Verbose ('[*] Removing managed resource group [{0}] of workspace [{1}]' -f $managedResourceGroupName, $resourceName) -Verbose - if ($PSCmdlet.ShouldProcess(('Managed resource group [{0}]' -f $managedResourceGroupName), 'Remove')) { - $null = Remove-AzResourceGroup -Name $managedResourceGroupName -Force + if ($PSCmdlet.ShouldProcess(('Backup item [{0}] from RSV [{1}]' -f $backupItem.Name, $vaultId), 'Remove')) { + $null = Disable-AzRecoveryServicesBackupProtection -Item $backupItem -VaultId $vaultId -RemoveRecoveryPoints -Force + } + } + + # Undo a potential soft delete state change + $null = Set-AzRecoveryServicesVaultProperty -VaultId $vaultId -SoftDeleteFeatureState $softDeleteStatus.TrimEnd('d') + break } - } - # If the `managedResourceGroupResourceId` parameter was NOT set during deployment, we should look for a resource group with the default name and remove it as it is not automatically removed - # NOTE: This requires that the default value uses the prefix 'rg-' - $defaultManagedResourceGroupName = "rg-$resourceName-managed" - if (Get-AzResourceGroup -Name $defaultManagedResourceGroupName -ErrorAction 'SilentlyContinue') { - Write-Verbose ('[*] Removing managed resource group [{0}] of workspace [{1}]' -f $defaultManagedResourceGroupName, $resourceName) -Verbose - if ($PSCmdlet.ShouldProcess(('Managed resource group [{0}]' -f $defaultManagedResourceGroupName), 'Remove')) { - $null = Remove-AzResourceGroup -Name $defaultManagedResourceGroupName -Force + 'Microsoft.Databricks/workspaces' { + $resourceGroupName = $ResourceId.Split('/')[4] + $resourceName = Split-Path $ResourceId -Leaf + # If the `managedResourceGroupResourceId` parameter was set during deployment, we should look for that resource group and remove it as it is not automatically removed + # NOTE: This requires that the provided value uses the prefix `rg-` and suffix '-managed' + $managedResourceGroupName = "rg-$resourceGroupName-managed" + if (Get-AzResourceGroup -Name $managedResourceGroupName -ErrorAction 'SilentlyContinue') { + Write-Verbose ('[*] Removing managed resource group [{0}] of workspace [{1}]' -f $managedResourceGroupName, $resourceName) -Verbose + if ($PSCmdlet.ShouldProcess(('Managed resource group [{0}]' -f $managedResourceGroupName), 'Remove')) { + $null = Remove-AzResourceGroup -Name $managedResourceGroupName -Force + } + } + # If the `managedResourceGroupResourceId` parameter was NOT set during deployment, we should look for a resource group with the default name and remove it as it is not automatically removed + # NOTE: This requires that the default value uses the prefix 'rg-' + $defaultManagedResourceGroupName = "rg-$resourceName-managed" + if (Get-AzResourceGroup -Name $defaultManagedResourceGroupName -ErrorAction 'SilentlyContinue') { + Write-Verbose ('[*] Removing managed resource group [{0}] of workspace [{1}]' -f $defaultManagedResourceGroupName, $resourceName) -Verbose + if ($PSCmdlet.ShouldProcess(('Managed resource group [{0}]' -f $defaultManagedResourceGroupName), 'Remove')) { + $null = Remove-AzResourceGroup -Name $defaultManagedResourceGroupName -Force + } + } + break } + ### CODE LOCATION: Add custom post-removal operation here } - break + break # Post-removal was successful + } catch { + Write-Warning ('[!] Post-removal operation failed. Reason: [{0}]. Retry [{1}/{2}]' -f $_.Exception.Message, $removalRetryCount, $PostRemovalRetryLimit) + $removalRetryCount++ } - ### CODE LOCATION: Add custom post-removal operation here - } + } while ($postRemovalRetryCount -le $PostRemovalRetryLimit) }