diff --git a/acr-aks-stamp/modules/acr.bicep b/acr-aks-stamp/modules/acr.bicep new file mode 100644 index 00000000..0c5f0ac9 --- /dev/null +++ b/acr-aks-stamp/modules/acr.bicep @@ -0,0 +1,136 @@ +param vnetId string +param privateLinkSubnetId string +param location string +param geoRedundancyLocation string +param acrName string +param logAnalyticsWorkspaceName string + +var acrPrivateDnsZoneName = 'privatelink.azurecr.io' + +resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2020-10-01' existing = { + name: logAnalyticsWorkspaceName +} + +resource acrPrivateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = { + name: acrPrivateDnsZoneName + location: 'global' + properties: {} +} + +resource acrPrivateDnsZonesNameVNetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = { + parent: acrPrivateDnsZone + name: 'to_aksvnet' + location: 'global' + properties: { + virtualNetwork: { + id: vnetId + } + registrationEnabled: false + } +} + +resource acr 'Microsoft.ContainerRegistry/registries@2020-11-01-preview' = { + name: acrName + location: location + sku: { + name: 'Premium' + } + properties: { + adminUserEnabled: false + networkRuleSet: { + defaultAction: 'Deny' + virtualNetworkRules: [] + ipRules: [] + } + policies: { + quarantinePolicy: { + status: 'disabled' + } + trustPolicy: { + type: 'Notary' + status: 'disabled' + } + retentionPolicy: { + days: 15 + status: 'enabled' + } + } + publicNetworkAccess: 'Disabled' + encryption: { + status: 'disabled' + } + dataEndpointEnabled: true + networkRuleBypassOptions: 'AzureServices' + zoneRedundancy: 'Disabled' + } +} + +resource acrReplication 'Microsoft.ContainerRegistry/registries/replications@2020-11-01-preview' = { + parent: acr + name: geoRedundancyLocation + location: geoRedundancyLocation +} + +resource acrDiagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'Microsoft.Insights' + scope: acr + properties: { + workspaceId: logAnalyticsWorkspace.id + metrics: [ + { + timeGrain: 'PT1M' + category: 'AllMetrics' + enabled: true + } + ] + logs: [ + { + category: 'ContainerRegistryRepositoryEvents' + enabled: true + } + { + category: 'ContainerRegistryLoginEvents' + enabled: true + } + ] + } +} + +resource arcPrivateLink 'Microsoft.Network/privateEndpoints@2020-11-01' = { + name: 'acr_to_aksvnet' + location: location + properties: { + subnet: { + id: privateLinkSubnetId + } + privateLinkServiceConnections: [ + { + name: 'nodepools' + properties: { + privateLinkServiceId: acr.id + groupIds: [ + 'registry' + ] + } + } + ] + } + dependsOn: [ + acrReplication + ] +} + +resource acrPrivateLinkDnsZone 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2020-11-01' = { + parent: arcPrivateLink + name: 'default' + properties: { + privateDnsZoneConfigs: [ + { + name: 'privatelink-azurecr-io' + properties: { + privateDnsZoneId: acrPrivateDnsZone.id + } + } + ] + } +} diff --git a/acr-aks-stamp/modules/aks.bicep b/acr-aks-stamp/modules/aks.bicep new file mode 100644 index 00000000..70d210fb --- /dev/null +++ b/acr-aks-stamp/modules/aks.bicep @@ -0,0 +1,427 @@ +param aksClusterName string +param location string +param aksControlPlaneIdentityName string +param aksNodeResourceGroup string +param aksIngressDomainName string +param aksIngressIdentityName string +param aksIngressLoadBalancerIp string +param aksAuthorizedIPRanges string +param appSubDomainName string +param acrName string +param vnetId string +param aksSubnetId string +param logAnalyticsWorkspaceName string +param useAzureRBAC bool +param clusterAdminAadGroupObjectId string +param clusterUserAadGroupObjectId string +param businessUnitTag string +param applicationIdentifierTag string +param fluxSettings object + +var monitoringMetricsPublisherRole = '${subscription().id}/providers/Microsoft.Authorization/roleDefinitions/3913510d-42f4-4e42-8a64-420c390055eb' +var acrPullRole = '${subscription().id}/providers/Microsoft.Authorization/roleDefinitions/7f951dda-4ed3-4680-a7ca-43fe172d538d' +var containerInsightsSolutionName = 'ContainerInsights(${logAnalyticsWorkspaceName})' + +resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2020-10-01' existing = { + name: logAnalyticsWorkspaceName +} + +resource acr 'Microsoft.ContainerRegistry/registries@2020-11-01-preview' existing = { + name: acrName +} + +resource aksAcrPullRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${aks.id}-${acrName}-${acrPullRole}') + scope: acr + properties: { + principalId: reference(resourceId('Microsoft.ContainerService/managedClusters', aksClusterName), '2020-12-01').identityProfile.kubeletidentity.objectId + roleDefinitionId: acrPullRole + principalType: 'ServicePrincipal' + } +} + +resource aks 'Microsoft.ContainerService/managedClusters@2021-08-01' = { + name: aksClusterName + location: location + tags: { + 'Business unit': businessUnitTag + 'Application identifier': applicationIdentifierTag + } + identity: { + type: 'UserAssigned' + userAssignedIdentities: { + '${resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', aksControlPlaneIdentityName)}': { + } + } + } + sku: { + name: 'Basic' + tier: 'Paid' + } + properties: { + nodeResourceGroup: aksNodeResourceGroup + enableRBAC: true + enablePodSecurityPolicy: false + publicNetworkAccess: 'Enabled' + kubernetesVersion: '1.22.4' + dnsPrefix: '${aksClusterName}-dns' + agentPoolProfiles: [ + { + name: 'npsystem' + count: 3 + vmSize: 'Standard_DS2_v2' + osDiskSizeGB: 80 + osDiskType: 'Ephemeral' + osType: 'Linux' + minCount: 3 + maxCount: 4 + vnetSubnetID: aksSubnetId + enableAutoScaling: true + type: 'VirtualMachineScaleSets' + mode: 'System' + scaleSetPriority: 'Regular' + scaleSetEvictionPolicy: 'Delete' + orchestratorVersion: '1.22.4' + enableNodePublicIP: false + maxPods: 30 + availabilityZones: [ + '1' + '2' + '3' + ] + upgradeSettings: { + maxSurge: '33%' + } + nodeTaints: [ + 'CriticalAddonsOnly=true:NoSchedule' + ] + } + { + name: 'npuser01' + count: 3 + vmSize: 'Standard_DS3_v2' + osDiskSizeGB: 120 + osDiskType: 'Ephemeral' + osType: 'Linux' + minCount: 2 + maxCount: 5 + vnetSubnetID: aksSubnetId + enableAutoScaling: true + type: 'VirtualMachineScaleSets' + mode: 'User' + scaleSetPriority: 'Regular' + scaleSetEvictionPolicy: 'Delete' + orchestratorVersion: '1.22.4' + enableNodePublicIP: false + maxPods: 30 + availabilityZones: [ + '1' + '2' + '3' + ] + upgradeSettings: { + maxSurge: '33%' + } + } + ] + servicePrincipalProfile: { + clientId: 'msi' + } + addonProfiles: { + httpApplicationRouting: { + enabled: false + } + omsagent: { + enabled: true + config: { + logAnalyticsWorkspaceResourceId: logAnalyticsWorkspace.id + } + } + aciConnectorLinux: { + enabled: false + } + azurepolicy: { + enabled: true + config: { + version: 'v2' + } + } + azureKeyvaultSecretsProvider: { + enabled: true + config: { + enableSecretRotation: 'false' + } + } + } + networkProfile: { + networkPolicy: 'azure' + networkPlugin: 'azure' + loadBalancerSku: 'standard' + outboundType: 'loadBalancer' + //outboundType: 'userDefinedRouting' + //loadBalancerProfile: json('null') + serviceCidr: '172.16.0.0/16' + dnsServiceIP: '172.16.0.10' + dockerBridgeCidr: '172.18.0.1/16' + } + aadProfile: { + managed: true + enableAzureRBAC: useAzureRBAC + adminGroupObjectIDs: !useAzureRBAC ? array(clusterAdminAadGroupObjectId) : [] + tenantID: tenant().tenantId + } + autoScalerProfile: { + 'balance-similar-node-groups': 'false' + expander: 'random' + 'max-empty-bulk-delete': '10' + 'max-graceful-termination-sec': '600' + 'max-node-provision-time': '15m' + 'max-total-unready-percentage': '45' + 'new-pod-scale-up-delay': '0s' + 'ok-total-unready-count': '3' + 'scale-down-delay-after-add': '10m' + 'scale-down-delay-after-delete': '20s' + 'scale-down-delay-after-failure': '3m' + 'scale-down-unneeded-time': '10m' + 'scale-down-unready-time': '20m' + 'scale-down-utilization-threshold': '0.5' + 'scan-interval': '10s' + 'skip-nodes-with-local-storage': 'true' + 'skip-nodes-with-system-pods': 'true' + } + apiServerAccessProfile: { + authorizedIPRanges: [ + aksAuthorizedIPRanges + ] + enablePrivateCluster: false + } + podIdentityProfile: { + enabled: false + userAssignedIdentities: [] + userAssignedIdentityExceptions: [] + } + disableLocalAccounts: true + securityProfile: { + azureDefender: { + enabled: true + logAnalyticsWorkspaceResourceId: logAnalyticsWorkspace.id + } + } + oidcIssuerProfile: { + enabled: true + } + } + dependsOn: [ + aksPolicies + ] +} + +module aksNodes 'aksNodes.bicep' = { + name: 'aksNodeSettings' + params: { + aksClusterKubeletIdentityPrincipalId: reference(resourceId('Microsoft.ContainerService/managedClusters', aksClusterName), '2020-03-01').identityProfile.kubeletidentity.objectId + } + scope: resourceGroup(aksNodeResourceGroup) +} + +resource aksDiagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'Microsoft.Insights' + scope: aks + properties: { + workspaceId: logAnalyticsWorkspace.id + logs: [ + { + category: 'cluster-autoscaler' + enabled: true + } + { + category: 'kube-controller-manager' + enabled: true + } + { + category: 'kube-audit-admin' + enabled: true + } + { + category: 'guard' + enabled: true + } + ] + } +} + +module aksRBAC 'aksRBAC.bicep' = if (useAzureRBAC) { + name: 'aksRBAC' + params: { + aksClusterName: aksClusterName + clusterAdminAadGroupObjectId: clusterAdminAadGroupObjectId + clusterUserAadGroupObjectId: clusterUserAadGroupObjectId + aksIngressIdentityName: aksIngressIdentityName + userNamespaceName: applicationIdentifierTag + } + dependsOn: [ + aks + ] +} + +resource aksIngressDnsZone 'Microsoft.Network/privateDnsZones@2018-09-01' = { + name: aksIngressDomainName + location: 'global' + properties: {} +} + +resource aksIngressDnsZoneRecord 'Microsoft.Network/privateDnsZones/A@2018-09-01' = { + parent: aksIngressDnsZone + name: appSubDomainName + properties: { + ttl: 3600 + aRecords: [ + { + ipv4Address: aksIngressLoadBalancerIp + } + ] + } +} + +resource aksIngressDomainVNetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = { + parent: aksIngressDnsZone + name: 'to_aksvnet' + location: 'global' + properties: { + virtualNetwork: { + id: vnetId + } + registrationEnabled: false + } +} + +module aksPolicies 'aksPolicies.bicep' = { + name: 'aksPolicies' + params: { + acrName: acrName + aksClusterName: aksClusterName + } +} + +resource flux 'Microsoft.KubernetesConfiguration/extensions@2021-09-01' = { + scope: aks + name: 'flux' + properties: { + extensionType: 'microsoft.flux' + autoUpgradeMinorVersion: true + releaseTrain: 'Stable' + scope: { + cluster: { + releaseNamespace: 'flux-system' + } + } + configurationSettings: { + 'helm-controller.enabled': 'false' + 'source-controller.enabled': 'true' + 'kustomize-controller.enabled': 'true' + 'notification-controller.enabled': 'false' + 'image-automation-controller.enabled': 'false' + 'image-reflector-controller.enabled': 'false' + } + configurationProtectedSettings: {} + } + dependsOn: [ + aksAcrPullRoleAssignment + ] +} +resource fluxConfig 'Microsoft.KubernetesConfiguration/fluxConfigurations@2022-01-01-preview' = { + scope: aks + name: 'bootstrap' + properties: { + scope: 'cluster' + namespace: 'flux-system' + sourceKind: 'GitRepository' + gitRepository: { + url: fluxSettings.RepositoryUrl + timeoutInSeconds: 180 + syncIntervalInSeconds: 300 + repositoryRef: { + branch: fluxSettings.RepositoryBranch + tag: null + semver: null + commit: null + } + sshKnownHosts: '' + httpsUser: null + httpsCACert: null + localAuthRef: null + } + kustomizations: { + unified: { + path: fluxSettings.RepositorySubfolder + timeoutInSeconds: 300 + syncIntervalInSeconds: 300 + retryIntervalInSeconds: null + prune: true + force: false + } + } + } + dependsOn: [ + flux + aksAcrPullRoleAssignment + ] +} + +resource aksMetricsPublisherRole 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${aks.id}-omsagent-${monitoringMetricsPublisherRole}') + scope: aks + properties: { + principalId: reference(aks.id, '2020-12-01').addonProfiles.omsagent.identity.objectId + roleDefinitionId: monitoringMetricsPublisherRole + principalType: 'ServicePrincipal' + } +} + +resource aksContainerInsightsSolution 'Microsoft.OperationsManagement/solutions@2015-11-01-preview' = { + name: containerInsightsSolutionName + location: location + properties: { + workspaceResourceId: resourceId('Microsoft.OperationalInsights/workspaces', logAnalyticsWorkspaceName) + } + plan: { + name: containerInsightsSolutionName + product: 'OMSGallery/ContainerInsights' + promotionCode: '' + publisher: 'Microsoft' + } +} + +resource PodFailedScheduledQuery 'Microsoft.Insights/scheduledQueryRules@2018-04-16' = { + name: 'PodFailedScheduledQuery' + location: location + properties: { + description: 'Alert on pod Failed phase.' + enabled: 'true' + source: { + query: '//https://docs.microsoft.com/azure/azure-monitor/insights/container-insights-alerts \r\n let endDateTime = now(); let startDateTime = ago(1h); let trendBinSize = 1m; let clusterName = "${aksClusterName}"; KubePodInventory | where TimeGenerated < endDateTime | where TimeGenerated >= startDateTime | where ClusterName == clusterName | distinct ClusterName, TimeGenerated | summarize ClusterSnapshotCount = count() by bin(TimeGenerated, trendBinSize), ClusterName | join hint.strategy=broadcast ( KubePodInventory | where TimeGenerated < endDateTime | where TimeGenerated >= startDateTime | distinct ClusterName, Computer, PodUid, TimeGenerated, PodStatus | summarize TotalCount = count(), PendingCount = sumif(1, PodStatus =~ "Pending"), RunningCount = sumif(1, PodStatus =~ "Running"), SucceededCount = sumif(1, PodStatus =~ "Succeeded"), FailedCount = sumif(1, PodStatus =~ "Failed") by ClusterName, bin(TimeGenerated, trendBinSize) ) on ClusterName, TimeGenerated | extend UnknownCount = TotalCount - PendingCount - RunningCount - SucceededCount - FailedCount | project TimeGenerated, TotalCount = todouble(TotalCount) / ClusterSnapshotCount, PendingCount = todouble(PendingCount) / ClusterSnapshotCount, RunningCount = todouble(RunningCount) / ClusterSnapshotCount, SucceededCount = todouble(SucceededCount) / ClusterSnapshotCount, FailedCount = todouble(FailedCount) / ClusterSnapshotCount, UnknownCount = todouble(UnknownCount) / ClusterSnapshotCount| summarize AggregatedValue = avg(FailedCount) by bin(TimeGenerated, trendBinSize)' + dataSourceId: resourceId('Microsoft.OperationalInsights/workspaces', logAnalyticsWorkspaceName) + queryType: 'ResultCount' + } + schedule: { + frequencyInMinutes: 5 + timeWindowInMinutes: 10 + } + action: { + 'odata.type': 'Microsoft.WindowsAzure.Management.Monitoring.Alerts.Models.Microsoft.AppInsights.Nexus.DataContracts.Resources.ScheduledQueryRules.AlertingAction' + severity: '3' + trigger: { + thresholdOperator: 'GreaterThan' + threshold: 3 + metricTrigger: { + thresholdOperator: 'GreaterThan' + threshold: 2 + metricTriggerType: 'Consecutive' + } + } + } + } + dependsOn: [ + aksContainerInsightsSolution + ] +} diff --git a/acr-aks-stamp/modules/aksNodes.bicep b/acr-aks-stamp/modules/aksNodes.bicep new file mode 100644 index 00000000..623a81cc --- /dev/null +++ b/acr-aks-stamp/modules/aksNodes.bicep @@ -0,0 +1,15 @@ +param aksClusterKubeletIdentityPrincipalId string + +var virtualMachineContributorRole = '${subscription().id}/providers/Microsoft.Authorization/roleDefinitions/9980e02c-c2be-4d73-94e8-173b1dc7cf3c' + +//It is required to grant the AKS cluster with Virtual Machine Contributor role permissions over +//the cluster infrastructure resource group to work with Managed Identities and aad-pod-identity. +//Otherwise MIC component fails while attempting to update MSI on VMSS cluster nodes +resource id 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = { + name: guid(resourceGroup().id) + properties: { + roleDefinitionId: virtualMachineContributorRole + principalId: aksClusterKubeletIdentityPrincipalId + principalType: 'ServicePrincipal' + } +} diff --git a/acr-aks-stamp/modules/aksPolicies.bicep b/acr-aks-stamp/modules/aksPolicies.bicep new file mode 100644 index 00000000..f12372ab --- /dev/null +++ b/acr-aks-stamp/modules/aksPolicies.bicep @@ -0,0 +1,162 @@ +param aksClusterName string +param acrName string + +var policyResourceIdAKSLinuxRestrictive = '/providers/Microsoft.Authorization/policySetDefinitions/42b8ef37-b724-4e24-bbc8-7a7708edfe00' +var policyResourceIdEnforceHttpsIngress = '/providers/Microsoft.Authorization/policyDefinitions/1a5b4dca-0b6f-4cf5-907c-56316bc1bf3d' +var policyResourceIdEnforceInternalLoadBalancers = '/providers/Microsoft.Authorization/policyDefinitions/3fc4dc25-5baf-40d8-9b05-7fe74c1bc64e' +var policyResourceIdRoRootFilesystem = '/providers/Microsoft.Authorization/policyDefinitions/df49d893-a74c-421d-bc95-c663042e5b80' +var policyResourceIdEnforceResourceLimits = '/providers/Microsoft.Authorization/policyDefinitions/e345eecc-fa47-480f-9e88-67dcc122b164' +var policyResourceIdEnforceImageSource = '/providers/Microsoft.Authorization/policyDefinitions/febd0533-8e55-448f-b837-bd0e06f16469' +var policyResourceIdEnforceDefenderInCluster = '/providers/Microsoft.Authorization/policyDefinitions/a1840de2-8088-4ea8-b153-b4c723e9cb01' +var policyAssignmentNameAKSLinuxRestrictive = guid(policyResourceIdAKSLinuxRestrictive, resourceGroup().name, aksClusterName) +var policyAssignmentNameEnforceHttpsIngress = guid(policyResourceIdEnforceHttpsIngress, resourceGroup().name, aksClusterName) +var policyAssignmentNameEnforceInternalLoadBalancers = guid(policyResourceIdEnforceInternalLoadBalancers, resourceGroup().name, aksClusterName) +var policyAssignmentNameRoRootFilesystem = guid(policyResourceIdRoRootFilesystem, resourceGroup().name, aksClusterName) +var policyAssignmentNameEnforceResourceLimits = guid(policyResourceIdEnforceResourceLimits, resourceGroup().name, aksClusterName) +var policyAssignmentNameEnforceImageSource = guid(policyResourceIdEnforceImageSource, resourceGroup().name, aksClusterName) +var policyAssignmentNameEnforceDefenderInCluster = guid(policyResourceIdEnforceDefenderInCluster, resourceGroup().name, aksClusterName) + +resource policyAssignmentAKSLinuxRestrictive 'Microsoft.Authorization/policyAssignments@2020-09-01' = { + name: policyAssignmentNameAKSLinuxRestrictive + properties: { + displayName: '[${aksClusterName}] ${reference(policyResourceIdAKSLinuxRestrictive, '2020-09-01').displayName}' + scope: subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name) + policyDefinitionId: policyResourceIdAKSLinuxRestrictive + parameters: { + excludedNamespaces: { + value: [ + 'kube-system' + 'gatekeeper-system' + 'azure-arc' + 'cluster-baseline-settings' + ] + } + effect: { + value: 'audit' + } + } + } +} + +resource policyAssignmentEnforceHttpsIngress 'Microsoft.Authorization/policyAssignments@2020-09-01' = { + name: policyAssignmentNameEnforceHttpsIngress + properties: { + displayName: '[${aksClusterName}] ${reference(policyResourceIdEnforceHttpsIngress, '2020-09-01').displayName}' + scope: subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name) + policyDefinitionId: policyResourceIdEnforceHttpsIngress + parameters: { + excludedNamespaces: { + value: [] + } + effect: { + value: 'deny' + } + } + } +} + +resource policyAssignmentEnforceInternalLoadBalancers 'Microsoft.Authorization/policyAssignments@2020-09-01' = { + name: policyAssignmentNameEnforceInternalLoadBalancers + properties: { + displayName: '[${aksClusterName}] ${reference(policyResourceIdEnforceInternalLoadBalancers, '2020-09-01').displayName}' + scope: subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name) + policyDefinitionId: policyResourceIdEnforceInternalLoadBalancers + parameters: { + excludedNamespaces: { + value: [] + } + effect: { + value: 'deny' + } + } + } +} + +resource policyAssignmentRoRootFilesystem 'Microsoft.Authorization/policyAssignments@2020-09-01' = { + name: policyAssignmentNameRoRootFilesystem + properties: { + displayName: '[${aksClusterName}] ${reference(policyResourceIdRoRootFilesystem, '2020-09-01').displayName}' + scope: subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name) + policyDefinitionId: policyResourceIdRoRootFilesystem + parameters: { + excludedNamespaces: { + value: [ + 'kube-system' + 'gatekeeper-system' + 'azure-arc' + ] + } + effect: { + value: 'audit' + } + } + } +} + +resource policyAssignmentEnforceResourceLimits 'Microsoft.Authorization/policyAssignments@2020-09-01' = { + name: policyAssignmentNameEnforceResourceLimits + properties: { + displayName: '[${aksClusterName}] ${reference(policyResourceIdEnforceResourceLimits, '2020-09-01').displayName}' + scope: subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name) + policyDefinitionId: policyResourceIdEnforceResourceLimits + parameters: { + cpuLimit: { + value: '1000m' + } + memoryLimit: { + value: '512Mi' + } + excludedNamespaces: { + value: [ + 'kube-system' + 'gatekeeper-system' + 'azure-arc' + 'cluster-baseline-settings' + 'flux-system' + ] + } + effect: { + value: 'deny' + } + } + } +} + +resource policyAssignmentEnforceImageSource 'Microsoft.Authorization/policyAssignments@2020-09-01' = { + name: policyAssignmentNameEnforceImageSource + properties: { + displayName: '[${aksClusterName}] ${reference(policyResourceIdEnforceImageSource, '2020-09-01').displayName}' + scope: subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name) + policyDefinitionId: policyResourceIdEnforceImageSource + parameters: { + allowedContainerImagesRegex: { + value: '${acrName}.azurecr.io/.+$|mcr.microsoft.com/.+$|azurearcfork8s.azurecr.io/azurearcflux/images/stable/.+$|docker.io/weaveworks/kured.+$|docker.io/library/.+$' + } + excludedNamespaces: { + value: [ + 'kube-system' + 'gatekeeper-system' + 'azure-arc' + ] + } + effect: { + value: 'deny' + } + } + } +} + +resource policyAssignmentEnforceDefenderInCluster 'Microsoft.Authorization/policyAssignments@2020-09-01' = { + name: policyAssignmentNameEnforceDefenderInCluster + properties: { + displayName: '[${aksClusterName}] ${reference(policyResourceIdEnforceDefenderInCluster, '2020-09-01').displayName}' + description: 'Microsoft Defender for Containers should be enabled in the cluster.' + scope: subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name) + policyDefinitionId: policyResourceIdEnforceDefenderInCluster + parameters: { + effect: { + value: 'Audit' + } + } + } +} diff --git a/acr-aks-stamp/modules/aksRBAC.bicep b/acr-aks-stamp/modules/aksRBAC.bicep new file mode 100644 index 00000000..3b1be9c8 --- /dev/null +++ b/acr-aks-stamp/modules/aksRBAC.bicep @@ -0,0 +1,77 @@ +param aksClusterName string +param clusterUserAadGroupObjectId string +param clusterAdminAadGroupObjectId string +param aksIngressIdentityName string +param userNamespaceName string + +var managedIdentityOperatorRole = '${subscription().id}/providers/Microsoft.Authorization/roleDefinitions/f1a07417-d97a-45cb-824c-7a7467783830' +var clusterAdminRoleId = 'b1ff04bb-8a4e-4dc4-8eb5-8693973ce19b' +var clusterReaderRoleId = '7f6c6a51-bcf8-42ba-9220-52d62157d7db' +var serviceClusterUserRoleId = '4abbcc35-e782-43d8-92c5-2d3f1bd2253f' + +resource aks 'Microsoft.ContainerService/managedClusters@2021-10-01' existing = { + name: aksClusterName +} + +resource aksUserNamespace 'Microsoft.ContainerService/managedClusters/namespaces@2021-10-01' existing = { + parent: aks + name: userNamespaceName +} + +resource aksIngressIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = { + name: aksIngressIdentityName +} + +resource aksClusterAdmins 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = { + scope: aks + name: guid('aad-admin-group', aks.id, clusterAdminAadGroupObjectId) + properties: { + roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${clusterAdminRoleId}' + description: 'Members of this group are cluster admins of this cluster.' + principalId: clusterAdminAadGroupObjectId + principalType: 'Group' + } +} + +resource aksServiceClusterAdmins 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = { + scope: aks + name: guid('aad-admin-group-sc', aks.id, clusterAdminAadGroupObjectId) + properties: { + roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${serviceClusterUserRoleId}' + description: 'Members of this group are cluster users of this cluster.' + principalId: clusterAdminAadGroupObjectId + principalType: 'Group' + } +} + +resource aksUserNamespaceAdmins 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = if (clusterUserAadGroupObjectId != clusterAdminAadGroupObjectId) { + scope: aksUserNamespace + name: guid('aad-${userNamespaceName}-reader-group', aks.id, clusterUserAadGroupObjectId) + properties: { + roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${clusterReaderRoleId}' + principalId: clusterUserAadGroupObjectId + description: 'Members of this group are cluster admins of the a0008 namespace in this cluster.' + principalType: 'Group' + } +} + +resource aksServiceClusterUsers 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = if (clusterUserAadGroupObjectId != clusterAdminAadGroupObjectId) { + scope: aks + name: guid('aad-${userNamespaceName}-reader-group-sc', aks.id, clusterUserAadGroupObjectId) + properties: { + roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${serviceClusterUserRoleId}' + principalId: clusterUserAadGroupObjectId + description: 'Members of this group are cluster users of this cluster.' + principalType: 'Group' + } +} + +resource aksManagedIdentityOperator 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = { + scope: aksIngressIdentity + name: guid('podmi-ingress-controller/Microsoft.Authorization', resourceGroup().id, aksIngressIdentityName, managedIdentityOperatorRole) + properties: { + roleDefinitionId: managedIdentityOperatorRole + principalId: reference(aks.id, '2020-11-01').identityProfile.kubeletidentity.objectId + principalType: 'ServicePrincipal' + } +} diff --git a/acr-aks-stamp/modules/appgw.bicep b/acr-aks-stamp/modules/appgw.bicep new file mode 100644 index 00000000..d0613ea3 --- /dev/null +++ b/acr-aks-stamp/modules/appgw.bicep @@ -0,0 +1,218 @@ +param location string +param appGWName string +param appGWIdentityName string +param appGWListenerCertificateSecretId string +param aksIngressCertificateSecretId string +param appGWSubnetId string +param appGWHostName string +param aksBackendDomainName string +param logAnalyticsWorkspaceName string +param trustedRootCertificatesRequired bool + +var appGWPublicIpName = 'ip-${appGWName}' + +resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2020-10-01' existing = { + name: logAnalyticsWorkspaceName +} + +resource appGWPublicIp 'Microsoft.Network/publicIPAddresses@2021-05-01' = { + name: appGWPublicIpName + location: location + zones: [ + '1' + '2' + '3' + ] + sku: { + name: 'Standard' + } + properties: { + publicIPAllocationMethod: 'Static' + } +} + +resource appgw 'Microsoft.Network/applicationGateways@2021-05-01' = { + name: appGWName + location: location + identity: { + type: 'UserAssigned' + userAssignedIdentities: { + '${resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', appGWIdentityName)}': { + } + } + } + zones: [ + '1' + '2' + '3' + ] + properties: { + enableHttp2: false + sku: { + name: 'Standard_v2' + tier: 'Standard_v2' + } + sslPolicy: { + policyType: 'Custom' + cipherSuites: [ + 'TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384' + 'TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256' + ] + minProtocolVersion: 'TLSv1_2' + } + trustedRootCertificates: trustedRootCertificatesRequired ? [ + { + name: 'root-cert-wildcard-aks-ingress' + properties: { + keyVaultSecretId: aksIngressCertificateSecretId + } + } + ] : [] + sslCertificates: [ + { + name: 'ssl-certificate' + properties: { + keyVaultSecretId: appGWListenerCertificateSecretId + } + } + ] + frontendIPConfigurations:[ + { + name: '${appGWName}-Frontend' + properties: { + publicIPAddress: { + id: appGWPublicIp.id + } + } + } + ] + gatewayIPConfigurations: [ + { + name: '${appGWName}-Gateway' + properties:{ + subnet: { + id: appGWSubnetId + } + } + } + ] + autoscaleConfiguration:{ + minCapacity: 0 + maxCapacity: 2 + } + frontendPorts: [ + { + name: 'HTTPS' + properties: { + port: 443 + } + } + ] + httpListeners: [ + { + name: 'listener-https' + properties:{ + protocol: 'Https' + frontendIPConfiguration: { + id: resourceId('Microsoft.Network/applicationGateways/frontendIPConfigurations', appGWName, '${appGWName}-Frontend') + } + frontendPort: { + id: resourceId('Microsoft.Network/applicationGateways/frontendPorts', appGWName, 'HTTPS') + } + sslCertificate: { + id: resourceId('Microsoft.Network/applicationGateways/sslCertificates', appGWName, 'ssl-certificate') + } + hostName: appGWHostName + hostNames: [] + requireServerNameIndication: true + } + } + ] + requestRoutingRules: [ + { + name: '${appGWName}-RoutingRule' + properties: { + ruleType: 'Basic' + httpListener: { + id: resourceId('Microsoft.Network/applicationGateways/httpListeners', appGWName, 'listener-https') + } + backendAddressPool: { + id: resourceId('Microsoft.Network/applicationGateways/backendAddressPools', appGWName, aksBackendDomainName) + } + backendHttpSettings: { + id: resourceId('Microsoft.Network/applicationGateways/backendHttpSettingsCollection', appGWName, '${appGWName}-HttpSettings') + } + } + } + ] + probes: [ + { + name: 'aks-probe' + properties: { + protocol: 'Https' + path: '/' + interval: 30 + timeout: 30 + unhealthyThreshold: 3 + pickHostNameFromBackendHttpSettings: true + minServers: 0 + match: {} + } + } + ] + backendHttpSettingsCollection: [ + { + name: '${appGWName}-HttpSettings' + properties: { + requestTimeout: 20 + protocol: 'Https' + port: 443 + pickHostNameFromBackendAddress: true + cookieBasedAffinity: 'Disabled' + probe: { + id: resourceId('Microsoft.Network/applicationGateways/probes', appGWName, 'aks-probe') + } + trustedRootCertificates: trustedRootCertificatesRequired ? [ + { + id: resourceId('Microsoft.Network/applicationGateways/trustedRootCertificates', appGWName, 'root-cert-wildcard-aks-ingress') + } + ] : [] + } + } + ] + backendAddressPools: [ + { + name: aksBackendDomainName + properties: { + backendAddresses: [ + { + fqdn: aksBackendDomainName + } + ] + } + } + ] + } +} + +resource appGWDiagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'Microsoft.Insights' + scope: appgw + properties: { + workspaceId: logAnalyticsWorkspace.id + logs: [ + { + category: 'ApplicationGatewayAccessLog' + enabled: true + } + { + category: 'ApplicationGatewayPerformanceLog' + enabled: true + } + { + category: 'ApplicationGatewayFirewallLog' + enabled: true + } + ] + } +} diff --git a/acr-aks-stamp/modules/keyvault.bicep b/acr-aks-stamp/modules/keyvault.bicep new file mode 100644 index 00000000..c1c61314 --- /dev/null +++ b/acr-aks-stamp/modules/keyvault.bicep @@ -0,0 +1,190 @@ +param vnetId string +param privateLinkSubnetId string +param keyVaultName string +param location string +param appGWListenerCertificate string +param aksIngressCertificate string +param appGWIdentityPrincipalId string +param aksIngressIdentityPrincipalId string +param logAnalyticsWorkspaceName string + +var keyVaultSecretsUserRole = '${subscription().id}/providers/Microsoft.Authorization/roleDefinitions/4633458b-17de-408a-b874-0445c86b69e6' +var keyVaultUserRole = '${subscription().id}/providers/Microsoft.Authorization/roleDefinitions/21090545-7ca7-4776-b22c-e363652d74d2' +var keyVaultPrivateDnsZoneName = 'privatelink.vaultcore.azure.net' + +resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2020-10-01' existing = { + name: logAnalyticsWorkspaceName +} + +resource keyVault 'Microsoft.KeyVault/vaults@2021-06-01-preview' = { + name: keyVaultName + location: location + properties: { + accessPolicies: [] + sku: { + family: 'A' + name: 'standard' + } + tenantId: tenant().tenantId + networkAcls: { + bypass: 'AzureServices' + defaultAction: 'Deny' + ipRules: [] + virtualNetworkRules: [] + } + enableRbacAuthorization: true + enabledForDeployment: false + enabledForDiskEncryption: false + enabledForTemplateDeployment: false + enableSoftDelete: true + } + resource appGWListenerCertificateSecret 'secrets@2021-06-01-preview' = { + name: 'gateway-ssl-cert' + properties: { + value: appGWListenerCertificate + } + } + + resource aksIngressCertificateSecret 'secrets@2021-06-01-preview' = if (!empty(aksIngressCertificate)) { + name: 'appgw-aks-ingress-tls-cert' + properties: { + value: aksIngressCertificate + } + } + +} + +// Grant the Azure Application Gateway managed identity with key vault secret reader role permissions; this allows pulling frontend and backend certificates. +resource appGWKvSecretReaderRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${appGWIdentityPrincipalId}-${keyVault.id}-keyvault-secrets-roleassignment') + scope: keyVault + properties: { + principalId: appGWIdentityPrincipalId + roleDefinitionId: keyVaultSecretsUserRole + principalType: 'ServicePrincipal' + } +} + +// Grant the Azure Application Gateway managed identity with key vault reader role permissions; this allows pulling frontend and backend certificates. +resource appGWKvReaderRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${appGWIdentityPrincipalId}-${keyVault.id}-keyvault-roleassignment') + scope: keyVault + properties: { + principalId: appGWIdentityPrincipalId + roleDefinitionId: keyVaultUserRole + principalType: 'ServicePrincipal' + } +} + +// Grant the AKS cluster ingress controller pod managed identity with key vault secret reader role permissions; this allows our ingress controller to pull certificates. +resource aksIngressKvSecretReaderRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${aksIngressIdentityPrincipalId}-${keyVault.id}-keyvault-secrets-roleassignment') + scope: keyVault + properties: { + principalId: aksIngressIdentityPrincipalId + roleDefinitionId: keyVaultSecretsUserRole + principalType: 'ServicePrincipal' + } +} + +// Grant the AKS cluster ingress controller pod managed identity with key vault reader role permissions; this allows our ingress controller to pull certificates. +resource aksIngressKvReaderRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${aksIngressIdentityPrincipalId}-${keyVault.id}-keyvault-roleassignment') + scope: keyVault + properties: { + principalId: aksIngressIdentityPrincipalId + roleDefinitionId: keyVaultUserRole + principalType: 'ServicePrincipal' + } +} + +resource keyVaultDiagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'Microsoft.Insights' + scope: keyVault + properties: { + workspaceId: logAnalyticsWorkspace.id + logs: [ + { + category: 'AuditEvent' + enabled: true + } + ] + metrics: [ + { + category: 'AllMetrics' + enabled: true + } + ] + } +} + +resource keyVaultPrivateLink 'Microsoft.Network/privateEndpoints@2020-05-01' = { + name: 'akv-to-aksvnet' + location: location + properties: { + subnet: { + id: privateLinkSubnetId + } + privateLinkServiceConnections: [ + { + name: 'nodepools' + properties: { + privateLinkServiceId: keyVault.id + groupIds: [ + 'vault' + ] + } + } + ] + } +} + +resource keyVaultPrivateLinkDnsZone 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2020-05-01' = { + parent: keyVaultPrivateLink + name: 'default' + properties: { + privateDnsZoneConfigs: [ + { + name: 'privatelink-akv-net' + properties: { + privateDnsZoneId: keyVaultPrivateDnsZone.id + } + } + ] + } +} + +resource keyVaultPrivateDnsZone 'Microsoft.Network/privateDnsZones@2018-09-01' = { + name: keyVaultPrivateDnsZoneName + location: 'global' + properties: {} +} + +resource keyVaultPrivateDnsZoneLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = { + parent: keyVaultPrivateDnsZone + name: 'to_aksvnet' + location: 'global' + properties: { + virtualNetwork: { + id: vnetId + } + registrationEnabled: false + } +} + +resource keyVaultAnalyticsSolution 'Microsoft.OperationsManagement/solutions@2015-11-01-preview' = { + name: 'KeyVaultAnalytics(${logAnalyticsWorkspaceName})' + location: location + properties: { + workspaceResourceId: resourceId('Microsoft.OperationalInsights/workspaces', logAnalyticsWorkspaceName) + } + plan: { + name: 'KeyVaultAnalytics(${logAnalyticsWorkspaceName})' + product: 'OMSGallery/KeyVaultAnalytics' + promotionCode: '' + publisher: 'Microsoft' + } +} + +output appGWListenerCertificateSecretId string = keyVault::appGWListenerCertificateSecret.properties.secretUri +output aksIngressCertificateSecretId string = !empty(aksIngressCertificate) ? keyVault::aksIngressCertificateSecret.properties.secretUri : '' diff --git a/acr-aks-stamp/modules/managedIdentities.bicep b/acr-aks-stamp/modules/managedIdentities.bicep new file mode 100644 index 00000000..02baeb6c --- /dev/null +++ b/acr-aks-stamp/modules/managedIdentities.bicep @@ -0,0 +1,27 @@ +@secure() +param aksControlPlaneIdentityName string +param appGWIdentityName string +param aksIngressIdentityName string +param location string + +// The control plane identity used by the cluster. Used for networking access (VNET joining and DNS updating) +resource aksControlPlaneIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = { + name: aksControlPlaneIdentityName + location: location +} + +// User Managed Identity that App Gateway is assigned. Used for Azure Key Vault Access. +resource appGWIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = { + name: appGWIdentityName + location: location +} + +// User Managed Identity for the cluster's ingress controller pods. Used for Azure Key Vault Access. +resource aksIngressIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = { + name: aksIngressIdentityName + location: location +} + +output aksIngressIdentityPrincipalId string = aksIngressIdentity.properties.principalId +output appGWIdentityPrincipalId string = appGWIdentity.properties.principalId +output aksControlPlaneIdentityPrincipalId string = aksControlPlaneIdentity.properties.principalId diff --git a/acr-aks-stamp/modules/monitoring.bicep b/acr-aks-stamp/modules/monitoring.bicep new file mode 100644 index 00000000..49bb2ba2 --- /dev/null +++ b/acr-aks-stamp/modules/monitoring.bicep @@ -0,0 +1,694 @@ +param location string +param aksClusterName string +param logAnalyticsWorkspaceName string + +var containerInsightsSolutionName = 'ContainerInsights(${logAnalyticsWorkspaceName})' + +resource aks 'Microsoft.ContainerService/managedClusters@2021-10-01' existing = { + name: aksClusterName +} + +resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2020-10-01' existing = { + name: logAnalyticsWorkspaceName +} + +resource containerInsights 'Microsoft.OperationsManagement/solutions@2015-11-01-preview' existing = { + name: containerInsightsSolutionName +} + +resource aksEventGridTopic 'Microsoft.EventGrid/systemTopics@2020-10-15-preview' = { + name: aksClusterName + location: location + properties: { + source: resourceId('Microsoft.ContainerService/managedClusters', aksClusterName) + topicType: 'Microsoft.ContainerService.ManagedClusters' + } + dependsOn: [ + aks + ] +} + +resource aksEventGridTopicDiagnostics 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'Microsoft.Insights' + scope: aksEventGridTopic + properties: { + workspaceId: logAnalyticsWorkspace.id + logs: [ + { + category: 'DeliveryFailures' + enabled: true + } + ] + metrics: [ + { + category: 'AllMetrics' + enabled: true + } + ] + } +} + +resource Node_CPU_utilization_high_for_clusterName_CI_1 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Node CPU utilization high for ${aksClusterName} CI-1' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'host' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'cpuUsagePercentage' + metricNamespace: 'Insights.Container/nodes' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 80 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'Node CPU utilization across the cluster.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Node_working_set_memory_utilization_high_for_clusterName_CI_2 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Node working set memory utilization high for ${aksClusterName} CI-2' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'host' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'memoryWorkingSetPercentage' + metricNamespace: 'Insights.Container/nodes' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 80 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'Node working set memory utilization across the cluster.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Jobs_completed_more_than_6_hours_ago_for_clusterName_CI_11 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Jobs completed more than 6 hours ago for ${aksClusterName} CI-11' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'controllerName' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'kubernetes namespace' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'completedJobsCount' + metricNamespace: 'Insights.Container/pods' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 0 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors completed jobs (more than 6 hours ago).' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT1M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Container_CPU_usage_high_for_clusterName_CI_9 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Container CPU usage high for ${aksClusterName} CI-9' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'controllerName' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'kubernetes namespace' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'cpuExceededPercentage' + metricNamespace: 'Insights.Container/containers' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 90 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors container CPU utilization.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Container_working_set_memory_usage_high_for_clusterName_CI_10 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Container working set memory usage high for ${aksClusterName} CI-10' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'controllerName' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'kubernetes namespace' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'memoryWorkingSetExceededPercentage' + metricNamespace: 'Insights.Container/containers' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 90 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors container working set memory utilization.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Pods_in_failed_state_for_clusterName_CI_4 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Pods in failed state for ${aksClusterName} CI-4' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'phase' + operator: 'Include' + values: [ + 'Failed' + ] + } + ] + metricName: 'podCount' + metricNamespace: 'Insights.Container/pods' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 0 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'Pod status monitoring.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Disk_usage_high_for_clusterName_CI_5 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Disk usage high for ${aksClusterName} CI-5' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'host' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'device' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'DiskUsedPercentage' + metricNamespace: 'Insights.Container/nodes' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 80 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors disk usage for all nodes and storage devices.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Nodes_in_not_ready_status_for_clusterName_CI_3 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Nodes in not ready status for ${aksClusterName} CI-3' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'status' + operator: 'Include' + values: [ + 'NotReady' + ] + } + ] + metricName: 'nodesCount' + metricNamespace: 'Insights.Container/nodes' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 0 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'Node status monitoring.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Containers_getting_OOM_killed_for_clusterName_CI_6 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Containers getting OOM killed for ${aksClusterName} CI-6' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'kubernetes namespace' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'controllerName' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'oomKilledContainerCount' + metricNamespace: 'Insights.Container/pods' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 0 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors number of containers killed due to out of memory (OOM) error.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT1M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Persistent_volume_usage_high_for_clusterName_CI_18 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Persistent volume usage high for ${aksClusterName} CI-18' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'podName' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'kubernetesNamespace' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'pvUsageExceededPercentage' + metricNamespace: 'Insights.Container/persistentvolumes' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 80 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors persistent volume utilization.' + enabled: false + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Pods_not_in_ready_state_for_clusterName_CI_8 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Pods not in ready state for ${aksClusterName} CI-8' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'controllerName' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'kubernetes namespace' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'PodReadyPercentage' + metricNamespace: 'Insights.Container/pods' + name: 'Metric1' + operator: 'LessThan' + threshold: 80 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors for excessive pods not in the ready state.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'microsoft.containerservice/managedclusters' + windowSize: 'PT5M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource Restarting_container_count_for_clusterName_CI_7 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: 'Restarting container count for ${aksClusterName} CI-7' + location: 'global' + properties: { + actions: [] + criteria: { + allOf: [ + { + criterionType: 'StaticThresholdCriterion' + dimensions: [ + { + name: 'kubernetes namespace' + operator: 'Include' + values: [ + '*' + ] + } + { + name: 'controllerName' + operator: 'Include' + values: [ + '*' + ] + } + ] + metricName: 'restartingContainerCount' + metricNamespace: 'Insights.Container/pods' + name: 'Metric1' + operator: 'GreaterThan' + threshold: 0 + timeAggregation: 'Average' + skipMetricValidation: true + } + ] + 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' + } + description: 'This alert monitors number of containers restarting across the cluster.' + enabled: true + evaluationFrequency: 'PT1M' + scopes: [ + aks.id + ] + severity: 3 + targetResourceType: 'Microsoft.ContainerService/managedClusters' + windowSize: 'PT1M' + } + dependsOn: [ + aks + containerInsights + ] +} + +resource AllAzureAdvisorAlert 'microsoft.insights/activityLogAlerts@2017-04-01' = { + name: 'AllAzureAdvisorAlert' + location: 'Global' + properties: { + scopes: [ + resourceGroup().id + ] + condition: { + allOf: [ + { + field: 'category' + equals: 'Recommendation' + } + { + field: 'operationName' + equals: 'Microsoft.Advisor/recommendations/available/action' + } + ] + } + actions: { + actionGroups: [] + } + enabled: true + description: 'All azure advisor alerts' + } +} + +resource logAnalyticsWorkspaceName_AllPrometheus 'Microsoft.OperationalInsights/workspaces/savedSearches@2020-08-01' = { + name: '${logAnalyticsWorkspaceName}/AllPrometheus' + properties: { + eTag: '*' + category: 'Prometheus' + displayName: 'All collected Prometheus information' + query: 'InsightsMetrics | where Namespace == "prometheus"' + version: 1 + } +} + +resource logAnalyticsWorkspaceName_NodeRebootRequested 'Microsoft.OperationalInsights/workspaces/savedSearches@2020-08-01' = { + name: '${logAnalyticsWorkspaceName}/NodeRebootRequested' + properties: { + eTag: '*' + category: 'Prometheus' + displayName: 'Nodes reboot required by kured' + query: 'InsightsMetrics | where Namespace == "prometheus" and Name == "kured_reboot_required" | where Val > 0' + version: 1 + } +} diff --git a/acr-aks-stamp/modules/vnet.bicep b/acr-aks-stamp/modules/vnet.bicep new file mode 100644 index 00000000..f9f706cf --- /dev/null +++ b/acr-aks-stamp/modules/vnet.bicep @@ -0,0 +1,42 @@ +param aksControlPlanePrincipalId string +param vnetName string +param aksSubnetName string +param aksIngressSubnetName string + +var networkContributorRole = '${subscription().id}/providers/Microsoft.Authorization/roleDefinitions/4d97b98b-1d4f-4787-a291-c67834d212e7' + +resource vnet 'Microsoft.Network/virtualNetworks@2021-03-01' existing = { + name: vnetName +} + +resource aksSubnet 'Microsoft.Network/virtualNetworks/subnets@2021-03-01' existing = { + name: aksSubnetName + parent: vnet +} + +resource aksIngressSubnet 'Microsoft.Network/virtualNetworks/subnets@2021-03-01' existing = { + name: aksIngressSubnetName + parent: vnet +} + +// Allows cluster identity to join the nodepool vmss resources to this subnet. +resource aksNodeSubnetContributorRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${aksControlPlanePrincipalId}-${aksSubnet.id}-subnet-roleassignment') + scope: aksSubnet + properties: { + principalId: aksControlPlanePrincipalId + roleDefinitionId: networkContributorRole + principalType: 'ServicePrincipal' + } +} + +// Allows cluster identity to join load balancers (ingress resources) to this subnet. +resource aksIngressSubnetContributorRoleAssignment 'Microsoft.Authorization/roleAssignments@2020-08-01-preview' = { + name: guid('${aksControlPlanePrincipalId}-${aksIngressSubnet.id}-subnet-roleassignment') + scope: aksIngressSubnet + properties: { + principalId: aksControlPlanePrincipalId + roleDefinitionId: networkContributorRole + principalType: 'ServicePrincipal' + } +} diff --git a/acr-aks-stamp/template.bicep b/acr-aks-stamp/template.bicep new file mode 100644 index 00000000..8de2b66a --- /dev/null +++ b/acr-aks-stamp/template.bicep @@ -0,0 +1,190 @@ +// Azure Regions +param location string = 'westeurope' +param geoRedundancyLocation string = 'northeurope' + +// Unique Strings and Tags +param teamIdentitfier string = 'bu0001' +param appIdentitfier string = 'a0008' + +// Resource Names +param acrName string = 'acr${teamIdentitfier}${appIdentitfier}' +param logAnalyticsWorkspaceName string = 'la-${teamIdentitfier}-${appIdentitfier}' +param aksClusterName string = 'aks-${teamIdentitfier}-${appIdentitfier}' +param keyVaultName string = 'kv-${teamIdentitfier}-${appIdentitfier}' +param appGWName string = 'appgw-${teamIdentitfier}-${appIdentitfier}' + +// Additional Resource Groups +param vnetGroupName string = 'rg-Networking' +param aksNodeResourceGroup string = 'rg-${aksClusterName}-Nodes' + +// Network +param vnetName string = 'vnet-spoke-${teamIdentitfier}${appIdentitfier}-00' +param appGWSubnetName string = 'snet-applicationgateway' +param privateLinkSubnetName string = 'snet-privatelink' +param aksSubnetName string = 'snet-clusternodes' +param aksIngressSubnetName string = 'snet-clusteringressservices' +param aksIngressLoadBalancerIp string = '10.240.4.4' +param aksAuthorizedIPRanges string = '0.0.0.0/0' + +// DNS +param domainName string = 'contoso.com' +param aksIngressDomainName string = 'aks-ingress.${domainName}' +param aksBackendSubDomainName string = appIdentitfier +param appGWHostName string = 'bicycle.${domainName}' + +// Identities +param aksControlPlaneIdentityName string = 'mi-${aksClusterName}-controlplane' +param appGWIdentityName string = 'mi-appgateway-frontend' +param aksIngressIdentityName string = 'podmi-ingress-controller' +param useAzureRBAC bool = true +param clusterAdminAadGroupObjectId string +param clusterUserAadGroupObjectId string + +// Identifier and Secrets +param appGWListenerCertificateBase64 string // base64EncodedPfx +param aksIngressCertificateBase64 string // base64EncodedCer + +// Flux GitOps +param fluxConfig object = { + RepositoryUrl: 'https://github.com/mspnp/aks-baseline' + RepositoryBranch: 'main' + RepositorySubfolder: './cluster-manifests' +} + + +resource vnetGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = { + name: vnetGroupName + scope: subscription() +} + +resource vnet 'Microsoft.Network/virtualNetworks@2021-03-01' existing = { + name: vnetName + scope: vnetGroup +} + +resource aksSubnet 'Microsoft.Network/virtualNetworks/subnets@2021-03-01' existing = { + name: aksSubnetName + parent: vnet +} + +resource appGWSubnet 'Microsoft.Network/virtualNetworks/subnets@2021-03-01' existing = { + name: appGWSubnetName + parent: vnet +} + +resource aksIngressSubnet 'Microsoft.Network/virtualNetworks/subnets@2021-03-01' existing = { + name: aksIngressSubnetName + parent: vnet +} + +resource privateLinkSubnet 'Microsoft.Network/virtualNetworks/subnets@2021-03-01' existing = { + name: privateLinkSubnetName + parent: vnet +} + +resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-06-01' = { + name: logAnalyticsWorkspaceName + location: location +} + +module acrModule 'modules/acr.bicep' = { + name: 'acrStamp' + params: { + acrName: acrName + geoRedundancyLocation: geoRedundancyLocation + location: location + logAnalyticsWorkspaceName: logAnalyticsWorkspaceName + privateLinkSubnetId: privateLinkSubnet.id + vnetId: vnet.id + } +} + +module managedIdentitiesModule 'modules/managedIdentities.bicep' = { + name: 'managedIdentities' + params: { + aksControlPlaneIdentityName: aksControlPlaneIdentityName + appGWIdentityName: appGWIdentityName + aksIngressIdentityName: aksIngressIdentityName + location: location + } +} + +module keyVaultModule 'modules/keyvault.bicep' = { + name: 'keyVaultStamp' + params: { + location: location + appGWListenerCertificate: appGWListenerCertificateBase64 + aksIngressCertificate: aksIngressCertificateBase64 + keyVaultName: keyVaultName + aksIngressIdentityPrincipalId: managedIdentitiesModule.outputs.aksIngressIdentityPrincipalId + appGWIdentityPrincipalId: managedIdentitiesModule.outputs.appGWIdentityPrincipalId + logAnalyticsWorkspaceName: logAnalyticsWorkspaceName + privateLinkSubnetId: privateLinkSubnet.id + vnetId: vnet.id + } +} + +module aksVnetContributorRoleAssignmentModule 'modules/vnet.bicep' = { + name: 'EnsureClusterIdentityHasRbacToSelfManagedResources' + params: { + aksControlPlanePrincipalId: managedIdentitiesModule.outputs.aksControlPlaneIdentityPrincipalId + aksSubnetName: aksSubnetName + aksIngressSubnetName: aksIngressSubnetName + vnetName: vnetName + } + scope: vnetGroup +} + +module aksModule 'modules/aks.bicep' = { + name: 'aks' + params: { + acrName: acrName + aksClusterName: aksClusterName + aksControlPlaneIdentityName: aksControlPlaneIdentityName + aksNodeResourceGroup: aksNodeResourceGroup + aksSubnetId: aksSubnet.id + location: location + logAnalyticsWorkspaceName: logAnalyticsWorkspaceName + aksIngressDomainName: aksIngressDomainName + aksIngressIdentityName: aksIngressIdentityName + aksIngressLoadBalancerIp: aksIngressLoadBalancerIp + aksAuthorizedIPRanges: aksAuthorizedIPRanges + appSubDomainName: aksBackendSubDomainName + vnetId: vnet.id + useAzureRBAC: useAzureRBAC + clusterAdminAadGroupObjectId: clusterAdminAadGroupObjectId + clusterUserAadGroupObjectId: clusterUserAadGroupObjectId + applicationIdentifierTag: appIdentitfier + businessUnitTag: teamIdentitfier + fluxSettings: fluxConfig + } +} + +module appGWModule 'modules/appgw.bicep' = { + name: 'appGW' + params: { + aksBackendDomainName: '${aksBackendSubDomainName}.${aksIngressDomainName}' + appGWHostName: appGWHostName + appGWIdentityName: appGWIdentityName + appGWListenerCertificateSecretId: keyVaultModule.outputs.appGWListenerCertificateSecretId + aksIngressCertificateSecretId: keyVaultModule.outputs.aksIngressCertificateSecretId + appGWName: appGWName + appGWSubnetId: appGWSubnet.id + location: location + logAnalyticsWorkspaceName: logAnalyticsWorkspaceName + trustedRootCertificatesRequired: !empty(aksIngressCertificateBase64) + } +} + +module monitoringModule 'modules/monitoring.bicep' = { + name: 'monitoring' + params: { + aksClusterName: aksClusterName + location: location + logAnalyticsWorkspaceName: logAnalyticsWorkspaceName + } + dependsOn: [ + aksModule + logAnalytics + ] +} diff --git a/acr-aks-stamp/template.json b/acr-aks-stamp/template.json new file mode 100644 index 00000000..4cf18223 --- /dev/null +++ b/acr-aks-stamp/template.json @@ -0,0 +1,2742 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "10716463035553745571" + } + }, + "parameters": { + "location": { + "type": "string", + "defaultValue": "westeurope" + }, + "geoRedundancyLocation": { + "type": "string", + "defaultValue": "northeurope" + }, + "teamIdentitfier": { + "type": "string", + "defaultValue": "bu0001" + }, + "appIdentitfier": { + "type": "string", + "defaultValue": "a0008" + }, + "acrName": { + "type": "string", + "defaultValue": "[format('acr{0}{1}', parameters('teamIdentitfier'), parameters('appIdentitfier'))]" + }, + "logAnalyticsWorkspaceName": { + "type": "string", + "defaultValue": "[format('la-{0}-{1}', parameters('teamIdentitfier'), parameters('appIdentitfier'))]" + }, + "aksClusterName": { + "type": "string", + "defaultValue": "[format('aks-{0}-{1}', parameters('teamIdentitfier'), parameters('appIdentitfier'))]" + }, + "keyVaultName": { + "type": "string", + "defaultValue": "[format('kv-{0}-{1}', parameters('teamIdentitfier'), parameters('appIdentitfier'))]" + }, + "appGWName": { + "type": "string", + "defaultValue": "[format('appgw-{0}-{1}', parameters('teamIdentitfier'), parameters('appIdentitfier'))]" + }, + "vnetGroupName": { + "type": "string", + "defaultValue": "rg-Networking" + }, + "aksNodeResourceGroup": { + "type": "string", + "defaultValue": "[format('rg-{0}-Nodes', parameters('aksClusterName'))]" + }, + "vnetName": { + "type": "string", + "defaultValue": "[format('vnet-spoke-{0}{1}-00', parameters('teamIdentitfier'), parameters('appIdentitfier'))]" + }, + "appGWSubnetName": { + "type": "string", + "defaultValue": "snet-applicationgateway" + }, + "privateLinkSubnetName": { + "type": "string", + "defaultValue": "snet-privatelink" + }, + "aksSubnetName": { + "type": "string", + "defaultValue": "snet-clusternodes" + }, + "aksIngressSubnetName": { + "type": "string", + "defaultValue": "snet-clusteringressservices" + }, + "aksIngressLoadBalancerIp": { + "type": "string", + "defaultValue": "10.240.4.4" + }, + "aksAuthorizedIPRanges": { + "type": "string", + "defaultValue": "0.0.0.0/0" + }, + "domainName": { + "type": "string", + "defaultValue": "contoso.com" + }, + "aksIngressDomainName": { + "type": "string", + "defaultValue": "[format('aks-ingress.{0}', parameters('domainName'))]" + }, + "aksBackendSubDomainName": { + "type": "string", + "defaultValue": "[parameters('appIdentitfier')]" + }, + "appGWHostName": { + "type": "string", + "defaultValue": "[format('bicycle.{0}', parameters('domainName'))]" + }, + "aksControlPlaneIdentityName": { + "type": "string", + "defaultValue": "[format('mi-{0}-controlplane', parameters('aksClusterName'))]" + }, + "appGWIdentityName": { + "type": "string", + "defaultValue": "mi-appgateway-frontend" + }, + "aksIngressIdentityName": { + "type": "string", + "defaultValue": "podmi-ingress-controller" + }, + "useAzureRBAC": { + "type": "bool", + "defaultValue": true + }, + "clusterAdminAadGroupObjectId": { + "type": "string" + }, + "clusterUserAadGroupObjectId": { + "type": "string" + }, + "appGWListenerCertificateBase64": { + "type": "string" + }, + "aksIngressCertificateBase64": { + "type": "string" + }, + "fluxConfig": { + "type": "object", + "defaultValue": { + "RepositoryUrl": "https://github.com/mspnp/aks-baseline", + "RepositoryBranch": "main", + "RepositorySubfolder": "./cluster-manifests" + } + } + }, + "resources": [ + { + "type": "Microsoft.OperationalInsights/workspaces", + "apiVersion": "2021-06-01", + "name": "[parameters('logAnalyticsWorkspaceName')]", + "location": "[parameters('location')]" + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "acrStamp", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "acrName": { + "value": "[parameters('acrName')]" + }, + "geoRedundancyLocation": { + "value": "[parameters('geoRedundancyLocation')]" + }, + "location": { + "value": "[parameters('location')]" + }, + "logAnalyticsWorkspaceName": { + "value": "[parameters('logAnalyticsWorkspaceName')]" + }, + "privateLinkSubnetId": { + "value": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('vnetGroupName')), 'Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('privateLinkSubnetName'))]" + }, + "vnetId": { + "value": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('vnetGroupName')), 'Microsoft.Network/virtualNetworks', parameters('vnetName'))]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "17506562387358908869" + } + }, + "parameters": { + "vnetId": { + "type": "string" + }, + "privateLinkSubnetId": { + "type": "string" + }, + "location": { + "type": "string" + }, + "geoRedundancyLocation": { + "type": "string" + }, + "acrName": { + "type": "string" + }, + "logAnalyticsWorkspaceName": { + "type": "string" + } + }, + "variables": { + "acrPrivateDnsZoneName": "privatelink.azurecr.io" + }, + "resources": [ + { + "type": "Microsoft.Network/privateDnsZones", + "apiVersion": "2020-06-01", + "name": "[variables('acrPrivateDnsZoneName')]", + "location": "global", + "properties": {} + }, + { + "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", + "apiVersion": "2020-06-01", + "name": "[format('{0}/{1}', variables('acrPrivateDnsZoneName'), 'to_aksvnet')]", + "location": "global", + "properties": { + "virtualNetwork": { + "id": "[parameters('vnetId')]" + }, + "registrationEnabled": false + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/privateDnsZones', variables('acrPrivateDnsZoneName'))]" + ] + }, + { + "type": "Microsoft.ContainerRegistry/registries", + "apiVersion": "2020-11-01-preview", + "name": "[parameters('acrName')]", + "location": "[parameters('location')]", + "sku": { + "name": "Premium" + }, + "properties": { + "adminUserEnabled": false, + "networkRuleSet": { + "defaultAction": "Deny", + "virtualNetworkRules": [], + "ipRules": [] + }, + "policies": { + "quarantinePolicy": { + "status": "disabled" + }, + "trustPolicy": { + "type": "Notary", + "status": "disabled" + }, + "retentionPolicy": { + "days": 15, + "status": "enabled" + } + }, + "publicNetworkAccess": "Disabled", + "encryption": { + "status": "disabled" + }, + "dataEndpointEnabled": true, + "networkRuleBypassOptions": "AzureServices", + "zoneRedundancy": "Disabled" + } + }, + { + "type": "Microsoft.ContainerRegistry/registries/replications", + "apiVersion": "2020-11-01-preview", + "name": "[format('{0}/{1}', parameters('acrName'), parameters('geoRedundancyLocation'))]", + "location": "[parameters('geoRedundancyLocation')]", + "dependsOn": [ + "[resourceId('Microsoft.ContainerRegistry/registries', parameters('acrName'))]" + ] + }, + { + "type": "Microsoft.Insights/diagnosticSettings", + "apiVersion": "2021-05-01-preview", + "scope": "[format('Microsoft.ContainerRegistry/registries/{0}', parameters('acrName'))]", + "name": "Microsoft.Insights", + "properties": { + "workspaceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]", + "metrics": [ + { + "timeGrain": "PT1M", + "category": "AllMetrics", + "enabled": true + } + ], + "logs": [ + { + "category": "ContainerRegistryRepositoryEvents", + "enabled": true + }, + { + "category": "ContainerRegistryLoginEvents", + "enabled": true + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerRegistry/registries', parameters('acrName'))]" + ] + }, + { + "type": "Microsoft.Network/privateEndpoints", + "apiVersion": "2020-11-01", + "name": "acr_to_aksvnet", + "location": "[parameters('location')]", + "properties": { + "subnet": { + "id": "[parameters('privateLinkSubnetId')]" + }, + "privateLinkServiceConnections": [ + { + "name": "nodepools", + "properties": { + "privateLinkServiceId": "[resourceId('Microsoft.ContainerRegistry/registries', parameters('acrName'))]", + "groupIds": [ + "registry" + ] + } + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerRegistry/registries', parameters('acrName'))]", + "[resourceId('Microsoft.ContainerRegistry/registries/replications', parameters('acrName'), parameters('geoRedundancyLocation'))]" + ] + }, + { + "type": "Microsoft.Network/privateEndpoints/privateDnsZoneGroups", + "apiVersion": "2020-11-01", + "name": "[format('{0}/{1}', 'acr_to_aksvnet', 'default')]", + "properties": { + "privateDnsZoneConfigs": [ + { + "name": "privatelink-azurecr-io", + "properties": { + "privateDnsZoneId": "[resourceId('Microsoft.Network/privateDnsZones', variables('acrPrivateDnsZoneName'))]" + } + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/privateDnsZones', variables('acrPrivateDnsZoneName'))]", + "[resourceId('Microsoft.Network/privateEndpoints', 'acr_to_aksvnet')]" + ] + } + ] + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "managedIdentities", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "aksControlPlaneIdentityName": { + "value": "[parameters('aksControlPlaneIdentityName')]" + }, + "appGWIdentityName": { + "value": "[parameters('appGWIdentityName')]" + }, + "aksIngressIdentityName": { + "value": "[parameters('aksIngressIdentityName')]" + }, + "location": { + "value": "[parameters('location')]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "10829328124836994140" + } + }, + "parameters": { + "aksControlPlaneIdentityName": { + "type": "secureString" + }, + "appGWIdentityName": { + "type": "string" + }, + "aksIngressIdentityName": { + "type": "string" + }, + "location": { + "type": "string" + } + }, + "resources": [ + { + "type": "Microsoft.ManagedIdentity/userAssignedIdentities", + "apiVersion": "2018-11-30", + "name": "[parameters('aksControlPlaneIdentityName')]", + "location": "[parameters('location')]" + }, + { + "type": "Microsoft.ManagedIdentity/userAssignedIdentities", + "apiVersion": "2018-11-30", + "name": "[parameters('appGWIdentityName')]", + "location": "[parameters('location')]" + }, + { + "type": "Microsoft.ManagedIdentity/userAssignedIdentities", + "apiVersion": "2018-11-30", + "name": "[parameters('aksIngressIdentityName')]", + "location": "[parameters('location')]" + } + ], + "outputs": { + "aksIngressIdentityPrincipalId": { + "type": "string", + "value": "[reference(resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', parameters('aksIngressIdentityName'))).principalId]" + }, + "appGWIdentityPrincipalId": { + "type": "string", + "value": "[reference(resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', parameters('appGWIdentityName'))).principalId]" + }, + "aksControlPlaneIdentityPrincipalId": { + "type": "string", + "value": "[reference(resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', parameters('aksControlPlaneIdentityName'))).principalId]" + } + } + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "keyVaultStamp", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "location": { + "value": "[parameters('location')]" + }, + "appGWListenerCertificate": { + "value": "[parameters('appGWListenerCertificateBase64')]" + }, + "aksIngressCertificate": { + "value": "[parameters('aksIngressCertificateBase64')]" + }, + "keyVaultName": { + "value": "[parameters('keyVaultName')]" + }, + "aksIngressIdentityPrincipalId": { + "value": "[reference(resourceId('Microsoft.Resources/deployments', 'managedIdentities')).outputs.aksIngressIdentityPrincipalId.value]" + }, + "appGWIdentityPrincipalId": { + "value": "[reference(resourceId('Microsoft.Resources/deployments', 'managedIdentities')).outputs.appGWIdentityPrincipalId.value]" + }, + "logAnalyticsWorkspaceName": { + "value": "[parameters('logAnalyticsWorkspaceName')]" + }, + "privateLinkSubnetId": { + "value": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('vnetGroupName')), 'Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('privateLinkSubnetName'))]" + }, + "vnetId": { + "value": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('vnetGroupName')), 'Microsoft.Network/virtualNetworks', parameters('vnetName'))]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "10444007696249941006" + } + }, + "parameters": { + "vnetId": { + "type": "string" + }, + "privateLinkSubnetId": { + "type": "string" + }, + "keyVaultName": { + "type": "string" + }, + "location": { + "type": "string" + }, + "appGWListenerCertificate": { + "type": "string" + }, + "aksIngressCertificate": { + "type": "string" + }, + "appGWIdentityPrincipalId": { + "type": "string" + }, + "aksIngressIdentityPrincipalId": { + "type": "string" + }, + "logAnalyticsWorkspaceName": { + "type": "string" + } + }, + "variables": { + "keyVaultSecretsUserRole": "[format('{0}/providers/Microsoft.Authorization/roleDefinitions/4633458b-17de-408a-b874-0445c86b69e6', subscription().id)]", + "keyVaultUserRole": "[format('{0}/providers/Microsoft.Authorization/roleDefinitions/21090545-7ca7-4776-b22c-e363652d74d2', subscription().id)]", + "keyVaultPrivateDnsZoneName": "privatelink.vaultcore.azure.net" + }, + "resources": [ + { + "type": "Microsoft.KeyVault/vaults/secrets", + "apiVersion": "2021-06-01-preview", + "name": "[format('{0}/{1}', parameters('keyVaultName'), 'gateway-ssl-cert')]", + "properties": { + "value": "[parameters('appGWListenerCertificate')]" + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "condition": "[not(empty(parameters('aksIngressCertificate')))]", + "type": "Microsoft.KeyVault/vaults/secrets", + "apiVersion": "2021-06-01-preview", + "name": "[format('{0}/{1}', parameters('keyVaultName'), 'appgw-aks-ingress-tls-cert')]", + "properties": { + "value": "[parameters('aksIngressCertificate')]" + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "type": "Microsoft.KeyVault/vaults", + "apiVersion": "2021-06-01-preview", + "name": "[parameters('keyVaultName')]", + "location": "[parameters('location')]", + "properties": { + "accessPolicies": [], + "sku": { + "family": "A", + "name": "standard" + }, + "tenantId": "[tenant().tenantId]", + "networkAcls": { + "bypass": "AzureServices", + "defaultAction": "Deny", + "ipRules": [], + "virtualNetworkRules": [] + }, + "enableRbacAuthorization": true, + "enabledForDeployment": false, + "enabledForDiskEncryption": false, + "enabledForTemplateDeployment": false, + "enableSoftDelete": true + } + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.KeyVault/vaults/{0}', parameters('keyVaultName'))]", + "name": "[guid(format('{0}-{1}-keyvault-secrets-roleassignment', parameters('appGWIdentityPrincipalId'), resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))))]", + "properties": { + "principalId": "[parameters('appGWIdentityPrincipalId')]", + "roleDefinitionId": "[variables('keyVaultSecretsUserRole')]", + "principalType": "ServicePrincipal" + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.KeyVault/vaults/{0}', parameters('keyVaultName'))]", + "name": "[guid(format('{0}-{1}-keyvault-roleassignment', parameters('appGWIdentityPrincipalId'), resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))))]", + "properties": { + "principalId": "[parameters('appGWIdentityPrincipalId')]", + "roleDefinitionId": "[variables('keyVaultUserRole')]", + "principalType": "ServicePrincipal" + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.KeyVault/vaults/{0}', parameters('keyVaultName'))]", + "name": "[guid(format('{0}-{1}-keyvault-secrets-roleassignment', parameters('aksIngressIdentityPrincipalId'), resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))))]", + "properties": { + "principalId": "[parameters('aksIngressIdentityPrincipalId')]", + "roleDefinitionId": "[variables('keyVaultSecretsUserRole')]", + "principalType": "ServicePrincipal" + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.KeyVault/vaults/{0}', parameters('keyVaultName'))]", + "name": "[guid(format('{0}-{1}-keyvault-roleassignment', parameters('aksIngressIdentityPrincipalId'), resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))))]", + "properties": { + "principalId": "[parameters('aksIngressIdentityPrincipalId')]", + "roleDefinitionId": "[variables('keyVaultUserRole')]", + "principalType": "ServicePrincipal" + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "type": "Microsoft.Insights/diagnosticSettings", + "apiVersion": "2021-05-01-preview", + "scope": "[format('Microsoft.KeyVault/vaults/{0}', parameters('keyVaultName'))]", + "name": "Microsoft.Insights", + "properties": { + "workspaceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]", + "logs": [ + { + "category": "AuditEvent", + "enabled": true + } + ], + "metrics": [ + { + "category": "AllMetrics", + "enabled": true + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "type": "Microsoft.Network/privateEndpoints", + "apiVersion": "2020-05-01", + "name": "akv-to-aksvnet", + "location": "[parameters('location')]", + "properties": { + "subnet": { + "id": "[parameters('privateLinkSubnetId')]" + }, + "privateLinkServiceConnections": [ + { + "name": "nodepools", + "properties": { + "privateLinkServiceId": "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]", + "groupIds": [ + "vault" + ] + } + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]" + ] + }, + { + "type": "Microsoft.Network/privateEndpoints/privateDnsZoneGroups", + "apiVersion": "2020-05-01", + "name": "[format('{0}/{1}', 'akv-to-aksvnet', 'default')]", + "properties": { + "privateDnsZoneConfigs": [ + { + "name": "privatelink-akv-net", + "properties": { + "privateDnsZoneId": "[resourceId('Microsoft.Network/privateDnsZones', variables('keyVaultPrivateDnsZoneName'))]" + } + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/privateDnsZones', variables('keyVaultPrivateDnsZoneName'))]", + "[resourceId('Microsoft.Network/privateEndpoints', 'akv-to-aksvnet')]" + ] + }, + { + "type": "Microsoft.Network/privateDnsZones", + "apiVersion": "2018-09-01", + "name": "[variables('keyVaultPrivateDnsZoneName')]", + "location": "global", + "properties": {} + }, + { + "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", + "apiVersion": "2020-06-01", + "name": "[format('{0}/{1}', variables('keyVaultPrivateDnsZoneName'), 'to_aksvnet')]", + "location": "global", + "properties": { + "virtualNetwork": { + "id": "[parameters('vnetId')]" + }, + "registrationEnabled": false + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/privateDnsZones', variables('keyVaultPrivateDnsZoneName'))]" + ] + }, + { + "type": "Microsoft.OperationsManagement/solutions", + "apiVersion": "2015-11-01-preview", + "name": "[format('KeyVaultAnalytics({0})', parameters('logAnalyticsWorkspaceName'))]", + "location": "[parameters('location')]", + "properties": { + "workspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]" + }, + "plan": { + "name": "[format('KeyVaultAnalytics({0})', parameters('logAnalyticsWorkspaceName'))]", + "product": "OMSGallery/KeyVaultAnalytics", + "promotionCode": "", + "publisher": "Microsoft" + } + } + ], + "outputs": { + "appGWListenerCertificateSecretId": { + "type": "string", + "value": "[reference(resourceId('Microsoft.KeyVault/vaults/secrets', parameters('keyVaultName'), 'gateway-ssl-cert')).secretUri]" + }, + "aksIngressCertificateSecretId": { + "type": "string", + "value": "[if(not(empty(parameters('aksIngressCertificate'))), reference(resourceId('Microsoft.KeyVault/vaults/secrets', parameters('keyVaultName'), 'appgw-aks-ingress-tls-cert')).secretUri, '')]" + } + } + } + }, + "dependsOn": [ + "[resourceId('Microsoft.Resources/deployments', 'managedIdentities')]" + ] + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "EnsureClusterIdentityHasRbacToSelfManagedResources", + "resourceGroup": "[parameters('vnetGroupName')]", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "aksControlPlanePrincipalId": { + "value": "[reference(resourceId('Microsoft.Resources/deployments', 'managedIdentities')).outputs.aksControlPlaneIdentityPrincipalId.value]" + }, + "aksSubnetName": { + "value": "[parameters('aksSubnetName')]" + }, + "aksIngressSubnetName": { + "value": "[parameters('aksIngressSubnetName')]" + }, + "vnetName": { + "value": "[parameters('vnetName')]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "5804087660004797214" + } + }, + "parameters": { + "aksControlPlanePrincipalId": { + "type": "string" + }, + "vnetName": { + "type": "string" + }, + "aksSubnetName": { + "type": "string" + }, + "aksIngressSubnetName": { + "type": "string" + } + }, + "variables": { + "networkContributorRole": "[format('{0}/providers/Microsoft.Authorization/roleDefinitions/4d97b98b-1d4f-4787-a291-c67834d212e7', subscription().id)]" + }, + "resources": [ + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.Network/virtualNetworks/{0}/subnets/{1}', parameters('vnetName'), parameters('aksSubnetName'))]", + "name": "[guid(format('{0}-{1}-subnet-roleassignment', parameters('aksControlPlanePrincipalId'), resourceId('Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('aksSubnetName'))))]", + "properties": { + "principalId": "[parameters('aksControlPlanePrincipalId')]", + "roleDefinitionId": "[variables('networkContributorRole')]", + "principalType": "ServicePrincipal" + } + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.Network/virtualNetworks/{0}/subnets/{1}', parameters('vnetName'), parameters('aksIngressSubnetName'))]", + "name": "[guid(format('{0}-{1}-subnet-roleassignment', parameters('aksControlPlanePrincipalId'), resourceId('Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('aksIngressSubnetName'))))]", + "properties": { + "principalId": "[parameters('aksControlPlanePrincipalId')]", + "roleDefinitionId": "[variables('networkContributorRole')]", + "principalType": "ServicePrincipal" + } + } + ] + } + }, + "dependsOn": [ + "[resourceId('Microsoft.Resources/deployments', 'managedIdentities')]" + ] + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "aks", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "acrName": { + "value": "[parameters('acrName')]" + }, + "aksClusterName": { + "value": "[parameters('aksClusterName')]" + }, + "aksControlPlaneIdentityName": { + "value": "[parameters('aksControlPlaneIdentityName')]" + }, + "aksNodeResourceGroup": { + "value": "[parameters('aksNodeResourceGroup')]" + }, + "aksSubnetId": { + "value": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('vnetGroupName')), 'Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('aksSubnetName'))]" + }, + "location": { + "value": "[parameters('location')]" + }, + "logAnalyticsWorkspaceName": { + "value": "[parameters('logAnalyticsWorkspaceName')]" + }, + "aksIngressDomainName": { + "value": "[parameters('aksIngressDomainName')]" + }, + "aksIngressIdentityName": { + "value": "[parameters('aksIngressIdentityName')]" + }, + "aksIngressLoadBalancerIp": { + "value": "[parameters('aksIngressLoadBalancerIp')]" + }, + "aksAuthorizedIPRanges": { + "value": "[parameters('aksAuthorizedIPRanges')]" + }, + "appSubDomainName": { + "value": "[parameters('aksBackendSubDomainName')]" + }, + "vnetId": { + "value": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('vnetGroupName')), 'Microsoft.Network/virtualNetworks', parameters('vnetName'))]" + }, + "useAzureRBAC": { + "value": "[parameters('useAzureRBAC')]" + }, + "clusterAdminAadGroupObjectId": { + "value": "[parameters('clusterAdminAadGroupObjectId')]" + }, + "clusterUserAadGroupObjectId": { + "value": "[parameters('clusterUserAadGroupObjectId')]" + }, + "applicationIdentifierTag": { + "value": "[parameters('appIdentitfier')]" + }, + "businessUnitTag": { + "value": "[parameters('teamIdentitfier')]" + }, + "fluxSettings": { + "value": "[parameters('fluxConfig')]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "3617214316864246343" + } + }, + "parameters": { + "aksClusterName": { + "type": "string" + }, + "location": { + "type": "string" + }, + "aksControlPlaneIdentityName": { + "type": "string" + }, + "aksNodeResourceGroup": { + "type": "string" + }, + "aksIngressDomainName": { + "type": "string" + }, + "aksIngressIdentityName": { + "type": "string" + }, + "aksIngressLoadBalancerIp": { + "type": "string" + }, + "aksAuthorizedIPRanges": { + "type": "string" + }, + "appSubDomainName": { + "type": "string" + }, + "acrName": { + "type": "string" + }, + "vnetId": { + "type": "string" + }, + "aksSubnetId": { + "type": "string" + }, + "logAnalyticsWorkspaceName": { + "type": "string" + }, + "useAzureRBAC": { + "type": "bool" + }, + "clusterAdminAadGroupObjectId": { + "type": "string" + }, + "clusterUserAadGroupObjectId": { + "type": "string" + }, + "businessUnitTag": { + "type": "string" + }, + "applicationIdentifierTag": { + "type": "string" + }, + "fluxSettings": { + "type": "object" + } + }, + "variables": { + "monitoringMetricsPublisherRole": "[format('{0}/providers/Microsoft.Authorization/roleDefinitions/3913510d-42f4-4e42-8a64-420c390055eb', subscription().id)]", + "acrPullRole": "[format('{0}/providers/Microsoft.Authorization/roleDefinitions/7f951dda-4ed3-4680-a7ca-43fe172d538d', subscription().id)]", + "containerInsightsSolutionName": "[format('ContainerInsights({0})', parameters('logAnalyticsWorkspaceName'))]" + }, + "resources": [ + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.ContainerRegistry/registries/{0}', parameters('acrName'))]", + "name": "[guid(format('{0}-{1}-{2}', resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), parameters('acrName'), variables('acrPullRole')))]", + "properties": { + "principalId": "[reference(resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), '2020-12-01').identityProfile.kubeletidentity.objectId]", + "roleDefinitionId": "[variables('acrPullRole')]", + "principalType": "ServicePrincipal" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ] + }, + { + "type": "Microsoft.ContainerService/managedClusters", + "apiVersion": "2021-08-01", + "name": "[parameters('aksClusterName')]", + "location": "[parameters('location')]", + "tags": { + "Business unit": "[parameters('businessUnitTag')]", + "Application identifier": "[parameters('applicationIdentifierTag')]" + }, + "identity": { + "type": "UserAssigned", + "userAssignedIdentities": { + "[format('{0}', resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', parameters('aksControlPlaneIdentityName')))]": {} + } + }, + "sku": { + "name": "Basic", + "tier": "Paid" + }, + "properties": { + "nodeResourceGroup": "[parameters('aksNodeResourceGroup')]", + "enableRBAC": true, + "enablePodSecurityPolicy": false, + "publicNetworkAccess": "Enabled", + "kubernetesVersion": "1.22.4", + "dnsPrefix": "[format('{0}-dns', parameters('aksClusterName'))]", + "agentPoolProfiles": [ + { + "name": "npsystem", + "count": 3, + "vmSize": "Standard_DS2_v2", + "osDiskSizeGB": 80, + "osDiskType": "Ephemeral", + "osType": "Linux", + "minCount": 3, + "maxCount": 4, + "vnetSubnetID": "[parameters('aksSubnetId')]", + "enableAutoScaling": true, + "type": "VirtualMachineScaleSets", + "mode": "System", + "scaleSetPriority": "Regular", + "scaleSetEvictionPolicy": "Delete", + "orchestratorVersion": "1.22.4", + "enableNodePublicIP": false, + "maxPods": 30, + "availabilityZones": [ + "1", + "2", + "3" + ], + "upgradeSettings": { + "maxSurge": "33%" + }, + "nodeTaints": [ + "CriticalAddonsOnly=true:NoSchedule" + ] + }, + { + "name": "npuser01", + "count": 3, + "vmSize": "Standard_DS3_v2", + "osDiskSizeGB": 120, + "osDiskType": "Ephemeral", + "osType": "Linux", + "minCount": 2, + "maxCount": 5, + "vnetSubnetID": "[parameters('aksSubnetId')]", + "enableAutoScaling": true, + "type": "VirtualMachineScaleSets", + "mode": "User", + "scaleSetPriority": "Regular", + "scaleSetEvictionPolicy": "Delete", + "orchestratorVersion": "1.22.4", + "enableNodePublicIP": false, + "maxPods": 30, + "availabilityZones": [ + "1", + "2", + "3" + ], + "upgradeSettings": { + "maxSurge": "33%" + } + } + ], + "servicePrincipalProfile": { + "clientId": "msi" + }, + "addonProfiles": { + "httpApplicationRouting": { + "enabled": false + }, + "omsagent": { + "enabled": true, + "config": { + "logAnalyticsWorkspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]" + } + }, + "aciConnectorLinux": { + "enabled": false + }, + "azurepolicy": { + "enabled": true, + "config": { + "version": "v2" + } + }, + "azureKeyvaultSecretsProvider": { + "enabled": true, + "config": { + "enableSecretRotation": "false" + } + } + }, + "networkProfile": { + "networkPolicy": "azure", + "networkPlugin": "azure", + "loadBalancerSku": "standard", + "outboundType": "loadBalancer", + "serviceCidr": "172.16.0.0/16", + "dnsServiceIP": "172.16.0.10", + "dockerBridgeCidr": "172.18.0.1/16" + }, + "aadProfile": { + "managed": true, + "enableAzureRBAC": "[parameters('useAzureRBAC')]", + "adminGroupObjectIDs": "[if(not(parameters('useAzureRBAC')), array(parameters('clusterAdminAadGroupObjectId')), createArray())]", + "tenantID": "[tenant().tenantId]" + }, + "autoScalerProfile": { + "balance-similar-node-groups": "false", + "expander": "random", + "max-empty-bulk-delete": "10", + "max-graceful-termination-sec": "600", + "max-node-provision-time": "15m", + "max-total-unready-percentage": "45", + "new-pod-scale-up-delay": "0s", + "ok-total-unready-count": "3", + "scale-down-delay-after-add": "10m", + "scale-down-delay-after-delete": "20s", + "scale-down-delay-after-failure": "3m", + "scale-down-unneeded-time": "10m", + "scale-down-unready-time": "20m", + "scale-down-utilization-threshold": "0.5", + "scan-interval": "10s", + "skip-nodes-with-local-storage": "true", + "skip-nodes-with-system-pods": "true" + }, + "apiServerAccessProfile": { + "authorizedIPRanges": [ + "[parameters('aksAuthorizedIPRanges')]" + ], + "enablePrivateCluster": false + }, + "podIdentityProfile": { + "enabled": false, + "userAssignedIdentities": [], + "userAssignedIdentityExceptions": [] + }, + "disableLocalAccounts": true, + "securityProfile": { + "azureDefender": { + "enabled": true, + "logAnalyticsWorkspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]" + } + }, + "oidcIssuerProfile": { + "enabled": true + } + }, + "dependsOn": [ + "[resourceId('Microsoft.Resources/deployments', 'aksPolicies')]" + ] + }, + { + "type": "Microsoft.Insights/diagnosticSettings", + "apiVersion": "2021-05-01-preview", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', parameters('aksClusterName'))]", + "name": "Microsoft.Insights", + "properties": { + "workspaceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]", + "logs": [ + { + "category": "cluster-autoscaler", + "enabled": true + }, + { + "category": "kube-controller-manager", + "enabled": true + }, + { + "category": "kube-audit-admin", + "enabled": true + }, + { + "category": "guard", + "enabled": true + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ] + }, + { + "type": "Microsoft.Network/privateDnsZones", + "apiVersion": "2018-09-01", + "name": "[parameters('aksIngressDomainName')]", + "location": "global", + "properties": {} + }, + { + "type": "Microsoft.Network/privateDnsZones/A", + "apiVersion": "2018-09-01", + "name": "[format('{0}/{1}', parameters('aksIngressDomainName'), parameters('appSubDomainName'))]", + "properties": { + "ttl": 3600, + "aRecords": [ + { + "ipv4Address": "[parameters('aksIngressLoadBalancerIp')]" + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/privateDnsZones', parameters('aksIngressDomainName'))]" + ] + }, + { + "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks", + "apiVersion": "2020-06-01", + "name": "[format('{0}/{1}', parameters('aksIngressDomainName'), 'to_aksvnet')]", + "location": "global", + "properties": { + "virtualNetwork": { + "id": "[parameters('vnetId')]" + }, + "registrationEnabled": false + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/privateDnsZones', parameters('aksIngressDomainName'))]" + ] + }, + { + "type": "Microsoft.KubernetesConfiguration/extensions", + "apiVersion": "2021-09-01", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', parameters('aksClusterName'))]", + "name": "flux", + "properties": { + "extensionType": "microsoft.flux", + "autoUpgradeMinorVersion": true, + "releaseTrain": "Stable", + "scope": { + "cluster": { + "releaseNamespace": "flux-system" + } + }, + "configurationSettings": { + "helm-controller.enabled": "false", + "source-controller.enabled": "true", + "kustomize-controller.enabled": "true", + "notification-controller.enabled": "false", + "image-automation-controller.enabled": "false", + "image-reflector-controller.enabled": "false" + }, + "configurationProtectedSettings": {} + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]", + "[extensionResourceId(resourceId('Microsoft.ContainerRegistry/registries', parameters('acrName')), 'Microsoft.Authorization/roleAssignments', guid(format('{0}-{1}-{2}', resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), parameters('acrName'), variables('acrPullRole'))))]" + ] + }, + { + "type": "Microsoft.KubernetesConfiguration/fluxConfigurations", + "apiVersion": "2022-01-01-preview", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', parameters('aksClusterName'))]", + "name": "bootstrap", + "properties": { + "scope": "cluster", + "namespace": "flux-system", + "sourceKind": "GitRepository", + "gitRepository": { + "url": "[parameters('fluxSettings').RepositoryUrl]", + "timeoutInSeconds": 180, + "syncIntervalInSeconds": 300, + "repositoryRef": { + "branch": "[parameters('fluxSettings').RepositoryBranch]", + "tag": null, + "semver": null, + "commit": null + }, + "sshKnownHosts": "", + "httpsUser": null, + "httpsCACert": null, + "localAuthRef": null + }, + "kustomizations": { + "unified": { + "path": "[parameters('fluxSettings').RepositorySubfolder]", + "timeoutInSeconds": 300, + "syncIntervalInSeconds": 300, + "retryIntervalInSeconds": null, + "prune": true, + "force": false + } + } + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]", + "[extensionResourceId(resourceId('Microsoft.ContainerRegistry/registries', parameters('acrName')), 'Microsoft.Authorization/roleAssignments', guid(format('{0}-{1}-{2}', resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), parameters('acrName'), variables('acrPullRole'))))]", + "[extensionResourceId(resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), 'Microsoft.KubernetesConfiguration/extensions', 'flux')]" + ] + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-08-01-preview", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', parameters('aksClusterName'))]", + "name": "[guid(format('{0}-omsagent-{1}', resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), variables('monitoringMetricsPublisherRole')))]", + "properties": { + "principalId": "[reference(resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), '2020-12-01').addonProfiles.omsagent.identity.objectId]", + "roleDefinitionId": "[variables('monitoringMetricsPublisherRole')]", + "principalType": "ServicePrincipal" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ] + }, + { + "type": "Microsoft.OperationsManagement/solutions", + "apiVersion": "2015-11-01-preview", + "name": "[variables('containerInsightsSolutionName')]", + "location": "[parameters('location')]", + "properties": { + "workspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]" + }, + "plan": { + "name": "[variables('containerInsightsSolutionName')]", + "product": "OMSGallery/ContainerInsights", + "promotionCode": "", + "publisher": "Microsoft" + } + }, + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2018-04-16", + "name": "PodFailedScheduledQuery", + "location": "[parameters('location')]", + "properties": { + "description": "Alert on pod Failed phase.", + "enabled": "true", + "source": { + "query": "[format('//https://docs.microsoft.com/azure/azure-monitor/insights/container-insights-alerts \r\n let endDateTime = now(); let startDateTime = ago(1h); let trendBinSize = 1m; let clusterName = \"{0}\"; KubePodInventory | where TimeGenerated < endDateTime | where TimeGenerated >= startDateTime | where ClusterName == clusterName | distinct ClusterName, TimeGenerated | summarize ClusterSnapshotCount = count() by bin(TimeGenerated, trendBinSize), ClusterName | join hint.strategy=broadcast ( KubePodInventory | where TimeGenerated < endDateTime | where TimeGenerated >= startDateTime | distinct ClusterName, Computer, PodUid, TimeGenerated, PodStatus | summarize TotalCount = count(), PendingCount = sumif(1, PodStatus =~ \"Pending\"), RunningCount = sumif(1, PodStatus =~ \"Running\"), SucceededCount = sumif(1, PodStatus =~ \"Succeeded\"), FailedCount = sumif(1, PodStatus =~ \"Failed\") by ClusterName, bin(TimeGenerated, trendBinSize) ) on ClusterName, TimeGenerated | extend UnknownCount = TotalCount - PendingCount - RunningCount - SucceededCount - FailedCount | project TimeGenerated, TotalCount = todouble(TotalCount) / ClusterSnapshotCount, PendingCount = todouble(PendingCount) / ClusterSnapshotCount, RunningCount = todouble(RunningCount) / ClusterSnapshotCount, SucceededCount = todouble(SucceededCount) / ClusterSnapshotCount, FailedCount = todouble(FailedCount) / ClusterSnapshotCount, UnknownCount = todouble(UnknownCount) / ClusterSnapshotCount| summarize AggregatedValue = avg(FailedCount) by bin(TimeGenerated, trendBinSize)', parameters('aksClusterName'))]", + "dataSourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]", + "queryType": "ResultCount" + }, + "schedule": { + "frequencyInMinutes": 5, + "timeWindowInMinutes": 10 + }, + "action": { + "odata.type": "Microsoft.WindowsAzure.Management.Monitoring.Alerts.Models.Microsoft.AppInsights.Nexus.DataContracts.Resources.ScheduledQueryRules.AlertingAction", + "severity": "3", + "trigger": { + "thresholdOperator": "GreaterThan", + "threshold": 3, + "metricTrigger": { + "thresholdOperator": "GreaterThan", + "threshold": 2, + "metricTriggerType": "Consecutive" + } + } + } + }, + "dependsOn": [ + "[resourceId('Microsoft.OperationsManagement/solutions', variables('containerInsightsSolutionName'))]" + ] + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "aksNodeSettings", + "resourceGroup": "[parameters('aksNodeResourceGroup')]", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "aksClusterKubeletIdentityPrincipalId": { + "value": "[reference(resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), '2020-03-01').identityProfile.kubeletidentity.objectId]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "14995945467472764956" + } + }, + "parameters": { + "aksClusterKubeletIdentityPrincipalId": { + "type": "string" + } + }, + "variables": { + "virtualMachineContributorRole": "[format('{0}/providers/Microsoft.Authorization/roleDefinitions/9980e02c-c2be-4d73-94e8-173b1dc7cf3c', subscription().id)]" + }, + "resources": [ + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-04-01-preview", + "name": "[guid(resourceGroup().id)]", + "properties": { + "roleDefinitionId": "[variables('virtualMachineContributorRole')]", + "principalId": "[parameters('aksClusterKubeletIdentityPrincipalId')]", + "principalType": "ServicePrincipal" + } + } + ] + } + } + }, + { + "condition": "[parameters('useAzureRBAC')]", + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "aksRBAC", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "aksClusterName": { + "value": "[parameters('aksClusterName')]" + }, + "clusterAdminAadGroupObjectId": { + "value": "[parameters('clusterAdminAadGroupObjectId')]" + }, + "clusterUserAadGroupObjectId": { + "value": "[parameters('clusterUserAadGroupObjectId')]" + }, + "aksIngressIdentityName": { + "value": "[parameters('aksIngressIdentityName')]" + }, + "userNamespaceName": { + "value": "[parameters('applicationIdentifierTag')]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "17192544908865129077" + } + }, + "parameters": { + "aksClusterName": { + "type": "string" + }, + "clusterUserAadGroupObjectId": { + "type": "string" + }, + "clusterAdminAadGroupObjectId": { + "type": "string" + }, + "aksIngressIdentityName": { + "type": "string" + }, + "userNamespaceName": { + "type": "string" + } + }, + "variables": { + "managedIdentityOperatorRole": "[format('{0}/providers/Microsoft.Authorization/roleDefinitions/f1a07417-d97a-45cb-824c-7a7467783830', subscription().id)]", + "clusterAdminRoleId": "b1ff04bb-8a4e-4dc4-8eb5-8693973ce19b", + "clusterReaderRoleId": "7f6c6a51-bcf8-42ba-9220-52d62157d7db", + "serviceClusterUserRoleId": "4abbcc35-e782-43d8-92c5-2d3f1bd2253f" + }, + "resources": [ + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-04-01-preview", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', parameters('aksClusterName'))]", + "name": "[guid('aad-admin-group', resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), parameters('clusterAdminAadGroupObjectId'))]", + "properties": { + "roleDefinitionId": "[format('/subscriptions/{0}/providers/Microsoft.Authorization/roleDefinitions/{1}', subscription().subscriptionId, variables('clusterAdminRoleId'))]", + "description": "Members of this group are cluster admins of this cluster.", + "principalId": "[parameters('clusterAdminAadGroupObjectId')]", + "principalType": "Group" + } + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-04-01-preview", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', parameters('aksClusterName'))]", + "name": "[guid('aad-admin-group-sc', resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), parameters('clusterAdminAadGroupObjectId'))]", + "properties": { + "roleDefinitionId": "[format('/subscriptions/{0}/providers/Microsoft.Authorization/roleDefinitions/{1}', subscription().subscriptionId, variables('serviceClusterUserRoleId'))]", + "description": "Members of this group are cluster users of this cluster.", + "principalId": "[parameters('clusterAdminAadGroupObjectId')]", + "principalType": "Group" + } + }, + { + "condition": "[not(equals(parameters('clusterUserAadGroupObjectId'), parameters('clusterAdminAadGroupObjectId')))]", + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-04-01-preview", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}/namespaces/{1}', parameters('aksClusterName'), parameters('userNamespaceName'))]", + "name": "[guid(format('aad-{0}-reader-group', parameters('userNamespaceName')), resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), parameters('clusterUserAadGroupObjectId'))]", + "properties": { + "roleDefinitionId": "[format('/subscriptions/{0}/providers/Microsoft.Authorization/roleDefinitions/{1}', subscription().subscriptionId, variables('clusterReaderRoleId'))]", + "principalId": "[parameters('clusterUserAadGroupObjectId')]", + "description": "Members of this group are cluster admins of the a0008 namespace in this cluster.", + "principalType": "Group" + } + }, + { + "condition": "[not(equals(parameters('clusterUserAadGroupObjectId'), parameters('clusterAdminAadGroupObjectId')))]", + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-04-01-preview", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', parameters('aksClusterName'))]", + "name": "[guid(format('aad-{0}-reader-group-sc', parameters('userNamespaceName')), resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), parameters('clusterUserAadGroupObjectId'))]", + "properties": { + "roleDefinitionId": "[format('/subscriptions/{0}/providers/Microsoft.Authorization/roleDefinitions/{1}', subscription().subscriptionId, variables('serviceClusterUserRoleId'))]", + "principalId": "[parameters('clusterUserAadGroupObjectId')]", + "description": "Members of this group are cluster users of this cluster.", + "principalType": "Group" + } + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2020-04-01-preview", + "scope": "[format('Microsoft.ManagedIdentity/userAssignedIdentities/{0}', parameters('aksIngressIdentityName'))]", + "name": "[guid('podmi-ingress-controller/Microsoft.Authorization', resourceGroup().id, parameters('aksIngressIdentityName'), variables('managedIdentityOperatorRole'))]", + "properties": { + "roleDefinitionId": "[variables('managedIdentityOperatorRole')]", + "principalId": "[reference(resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName')), '2020-11-01').identityProfile.kubeletidentity.objectId]", + "principalType": "ServicePrincipal" + } + } + ] + } + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ] + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "aksPolicies", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "acrName": { + "value": "[parameters('acrName')]" + }, + "aksClusterName": { + "value": "[parameters('aksClusterName')]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "6557873674375311407" + } + }, + "parameters": { + "aksClusterName": { + "type": "string" + }, + "acrName": { + "type": "string" + } + }, + "variables": { + "policyResourceIdAKSLinuxRestrictive": "/providers/Microsoft.Authorization/policySetDefinitions/42b8ef37-b724-4e24-bbc8-7a7708edfe00", + "policyResourceIdEnforceHttpsIngress": "/providers/Microsoft.Authorization/policyDefinitions/1a5b4dca-0b6f-4cf5-907c-56316bc1bf3d", + "policyResourceIdEnforceInternalLoadBalancers": "/providers/Microsoft.Authorization/policyDefinitions/3fc4dc25-5baf-40d8-9b05-7fe74c1bc64e", + "policyResourceIdRoRootFilesystem": "/providers/Microsoft.Authorization/policyDefinitions/df49d893-a74c-421d-bc95-c663042e5b80", + "policyResourceIdEnforceResourceLimits": "/providers/Microsoft.Authorization/policyDefinitions/e345eecc-fa47-480f-9e88-67dcc122b164", + "policyResourceIdEnforceImageSource": "/providers/Microsoft.Authorization/policyDefinitions/febd0533-8e55-448f-b837-bd0e06f16469", + "policyResourceIdEnforceDefenderInCluster": "/providers/Microsoft.Authorization/policyDefinitions/a1840de2-8088-4ea8-b153-b4c723e9cb01", + "policyAssignmentNameAKSLinuxRestrictive": "[guid(variables('policyResourceIdAKSLinuxRestrictive'), resourceGroup().name, parameters('aksClusterName'))]", + "policyAssignmentNameEnforceHttpsIngress": "[guid(variables('policyResourceIdEnforceHttpsIngress'), resourceGroup().name, parameters('aksClusterName'))]", + "policyAssignmentNameEnforceInternalLoadBalancers": "[guid(variables('policyResourceIdEnforceInternalLoadBalancers'), resourceGroup().name, parameters('aksClusterName'))]", + "policyAssignmentNameRoRootFilesystem": "[guid(variables('policyResourceIdRoRootFilesystem'), resourceGroup().name, parameters('aksClusterName'))]", + "policyAssignmentNameEnforceResourceLimits": "[guid(variables('policyResourceIdEnforceResourceLimits'), resourceGroup().name, parameters('aksClusterName'))]", + "policyAssignmentNameEnforceImageSource": "[guid(variables('policyResourceIdEnforceImageSource'), resourceGroup().name, parameters('aksClusterName'))]", + "policyAssignmentNameEnforceDefenderInCluster": "[guid(variables('policyResourceIdEnforceDefenderInCluster'), resourceGroup().name, parameters('aksClusterName'))]" + }, + "resources": [ + { + "type": "Microsoft.Authorization/policyAssignments", + "apiVersion": "2020-09-01", + "name": "[variables('policyAssignmentNameAKSLinuxRestrictive')]", + "properties": { + "displayName": "[format('[{0}] {1}', parameters('aksClusterName'), reference(variables('policyResourceIdAKSLinuxRestrictive'), '2020-09-01').displayName)]", + "scope": "[subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name)]", + "policyDefinitionId": "[variables('policyResourceIdAKSLinuxRestrictive')]", + "parameters": { + "excludedNamespaces": { + "value": [ + "kube-system", + "gatekeeper-system", + "azure-arc", + "cluster-baseline-settings" + ] + }, + "effect": { + "value": "audit" + } + } + } + }, + { + "type": "Microsoft.Authorization/policyAssignments", + "apiVersion": "2020-09-01", + "name": "[variables('policyAssignmentNameEnforceHttpsIngress')]", + "properties": { + "displayName": "[format('[{0}] {1}', parameters('aksClusterName'), reference(variables('policyResourceIdEnforceHttpsIngress'), '2020-09-01').displayName)]", + "scope": "[subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name)]", + "policyDefinitionId": "[variables('policyResourceIdEnforceHttpsIngress')]", + "parameters": { + "excludedNamespaces": { + "value": [] + }, + "effect": { + "value": "deny" + } + } + } + }, + { + "type": "Microsoft.Authorization/policyAssignments", + "apiVersion": "2020-09-01", + "name": "[variables('policyAssignmentNameEnforceInternalLoadBalancers')]", + "properties": { + "displayName": "[format('[{0}] {1}', parameters('aksClusterName'), reference(variables('policyResourceIdEnforceInternalLoadBalancers'), '2020-09-01').displayName)]", + "scope": "[subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name)]", + "policyDefinitionId": "[variables('policyResourceIdEnforceInternalLoadBalancers')]", + "parameters": { + "excludedNamespaces": { + "value": [] + }, + "effect": { + "value": "deny" + } + } + } + }, + { + "type": "Microsoft.Authorization/policyAssignments", + "apiVersion": "2020-09-01", + "name": "[variables('policyAssignmentNameRoRootFilesystem')]", + "properties": { + "displayName": "[format('[{0}] {1}', parameters('aksClusterName'), reference(variables('policyResourceIdRoRootFilesystem'), '2020-09-01').displayName)]", + "scope": "[subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name)]", + "policyDefinitionId": "[variables('policyResourceIdRoRootFilesystem')]", + "parameters": { + "excludedNamespaces": { + "value": [ + "kube-system", + "gatekeeper-system", + "azure-arc" + ] + }, + "effect": { + "value": "audit" + } + } + } + }, + { + "type": "Microsoft.Authorization/policyAssignments", + "apiVersion": "2020-09-01", + "name": "[variables('policyAssignmentNameEnforceResourceLimits')]", + "properties": { + "displayName": "[format('[{0}] {1}', parameters('aksClusterName'), reference(variables('policyResourceIdEnforceResourceLimits'), '2020-09-01').displayName)]", + "scope": "[subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name)]", + "policyDefinitionId": "[variables('policyResourceIdEnforceResourceLimits')]", + "parameters": { + "cpuLimit": { + "value": "1000m" + }, + "memoryLimit": { + "value": "512Mi" + }, + "excludedNamespaces": { + "value": [ + "kube-system", + "gatekeeper-system", + "azure-arc", + "cluster-baseline-settings", + "flux-system" + ] + }, + "effect": { + "value": "deny" + } + } + } + }, + { + "type": "Microsoft.Authorization/policyAssignments", + "apiVersion": "2020-09-01", + "name": "[variables('policyAssignmentNameEnforceImageSource')]", + "properties": { + "displayName": "[format('[{0}] {1}', parameters('aksClusterName'), reference(variables('policyResourceIdEnforceImageSource'), '2020-09-01').displayName)]", + "scope": "[subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name)]", + "policyDefinitionId": "[variables('policyResourceIdEnforceImageSource')]", + "parameters": { + "allowedContainerImagesRegex": { + "value": "[format('{0}.azurecr.io/.+$|mcr.microsoft.com/.+$|azurearcfork8s.azurecr.io/azurearcflux/images/stable/.+$|docker.io/weaveworks/kured.+$|docker.io/library/.+$', parameters('acrName'))]" + }, + "excludedNamespaces": { + "value": [ + "kube-system", + "gatekeeper-system", + "azure-arc" + ] + }, + "effect": { + "value": "deny" + } + } + } + }, + { + "type": "Microsoft.Authorization/policyAssignments", + "apiVersion": "2020-09-01", + "name": "[variables('policyAssignmentNameEnforceDefenderInCluster')]", + "properties": { + "displayName": "[format('[{0}] {1}', parameters('aksClusterName'), reference(variables('policyResourceIdEnforceDefenderInCluster'), '2020-09-01').displayName)]", + "description": "Microsoft Defender for Containers should be enabled in the cluster.", + "scope": "[subscriptionResourceId('Microsoft.Resources/resourceGroups', resourceGroup().name)]", + "policyDefinitionId": "[variables('policyResourceIdEnforceDefenderInCluster')]", + "parameters": { + "effect": { + "value": "Audit" + } + } + } + } + ] + } + } + } + ] + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "appGW", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "aksBackendDomainName": { + "value": "[format('{0}.{1}', parameters('aksBackendSubDomainName'), parameters('aksIngressDomainName'))]" + }, + "appGWHostName": { + "value": "[parameters('appGWHostName')]" + }, + "appGWIdentityName": { + "value": "[parameters('appGWIdentityName')]" + }, + "appGWListenerCertificateSecretId": { + "value": "[reference(resourceId('Microsoft.Resources/deployments', 'keyVaultStamp')).outputs.appGWListenerCertificateSecretId.value]" + }, + "aksIngressCertificateSecretId": { + "value": "[reference(resourceId('Microsoft.Resources/deployments', 'keyVaultStamp')).outputs.aksIngressCertificateSecretId.value]" + }, + "appGWName": { + "value": "[parameters('appGWName')]" + }, + "appGWSubnetId": { + "value": "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, parameters('vnetGroupName')), 'Microsoft.Network/virtualNetworks/subnets', parameters('vnetName'), parameters('appGWSubnetName'))]" + }, + "location": { + "value": "[parameters('location')]" + }, + "logAnalyticsWorkspaceName": { + "value": "[parameters('logAnalyticsWorkspaceName')]" + }, + "trustedRootCertificatesRequired": { + "value": "[not(empty(parameters('aksIngressCertificateBase64')))]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "1169476776455845993" + } + }, + "parameters": { + "location": { + "type": "string" + }, + "appGWName": { + "type": "string" + }, + "appGWIdentityName": { + "type": "string" + }, + "appGWListenerCertificateSecretId": { + "type": "string" + }, + "aksIngressCertificateSecretId": { + "type": "string" + }, + "appGWSubnetId": { + "type": "string" + }, + "appGWHostName": { + "type": "string" + }, + "aksBackendDomainName": { + "type": "string" + }, + "logAnalyticsWorkspaceName": { + "type": "string" + }, + "trustedRootCertificatesRequired": { + "type": "bool" + } + }, + "variables": { + "appGWPublicIpName": "[format('ip-{0}', parameters('appGWName'))]" + }, + "resources": [ + { + "type": "Microsoft.Network/publicIPAddresses", + "apiVersion": "2021-05-01", + "name": "[variables('appGWPublicIpName')]", + "location": "[parameters('location')]", + "zones": [ + "1", + "2", + "3" + ], + "sku": { + "name": "Standard" + }, + "properties": { + "publicIPAllocationMethod": "Static" + } + }, + { + "type": "Microsoft.Network/applicationGateways", + "apiVersion": "2021-05-01", + "name": "[parameters('appGWName')]", + "location": "[parameters('location')]", + "identity": { + "type": "UserAssigned", + "userAssignedIdentities": { + "[format('{0}', resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', parameters('appGWIdentityName')))]": {} + } + }, + "zones": [ + "1", + "2", + "3" + ], + "properties": { + "enableHttp2": false, + "sku": { + "name": "Standard_v2", + "tier": "Standard_v2" + }, + "sslPolicy": { + "policyType": "Custom", + "cipherSuites": [ + "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", + "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256" + ], + "minProtocolVersion": "TLSv1_2" + }, + "trustedRootCertificates": "[if(parameters('trustedRootCertificatesRequired'), createArray(createObject('name', 'root-cert-wildcard-aks-ingress', 'properties', createObject('keyVaultSecretId', parameters('aksIngressCertificateSecretId')))), createArray())]", + "sslCertificates": [ + { + "name": "ssl-certificate", + "properties": { + "keyVaultSecretId": "[parameters('appGWListenerCertificateSecretId')]" + } + } + ], + "frontendIPConfigurations": [ + { + "name": "[format('{0}-Frontend', parameters('appGWName'))]", + "properties": { + "publicIPAddress": { + "id": "[resourceId('Microsoft.Network/publicIPAddresses', variables('appGWPublicIpName'))]" + } + } + } + ], + "gatewayIPConfigurations": [ + { + "name": "[format('{0}-Gateway', parameters('appGWName'))]", + "properties": { + "subnet": { + "id": "[parameters('appGWSubnetId')]" + } + } + } + ], + "autoscaleConfiguration": { + "minCapacity": 0, + "maxCapacity": 2 + }, + "frontendPorts": [ + { + "name": "HTTPS", + "properties": { + "port": 443 + } + } + ], + "httpListeners": [ + { + "name": "listener-https", + "properties": { + "protocol": "Https", + "frontendIPConfiguration": { + "id": "[resourceId('Microsoft.Network/applicationGateways/frontendIPConfigurations', parameters('appGWName'), format('{0}-Frontend', parameters('appGWName')))]" + }, + "frontendPort": { + "id": "[resourceId('Microsoft.Network/applicationGateways/frontendPorts', parameters('appGWName'), 'HTTPS')]" + }, + "sslCertificate": { + "id": "[resourceId('Microsoft.Network/applicationGateways/sslCertificates', parameters('appGWName'), 'ssl-certificate')]" + }, + "hostName": "[parameters('appGWHostName')]", + "hostNames": [], + "requireServerNameIndication": true + } + } + ], + "requestRoutingRules": [ + { + "name": "[format('{0}-RoutingRule', parameters('appGWName'))]", + "properties": { + "ruleType": "Basic", + "httpListener": { + "id": "[resourceId('Microsoft.Network/applicationGateways/httpListeners', parameters('appGWName'), 'listener-https')]" + }, + "backendAddressPool": { + "id": "[resourceId('Microsoft.Network/applicationGateways/backendAddressPools', parameters('appGWName'), parameters('aksBackendDomainName'))]" + }, + "backendHttpSettings": { + "id": "[resourceId('Microsoft.Network/applicationGateways/backendHttpSettingsCollection', parameters('appGWName'), format('{0}-HttpSettings', parameters('appGWName')))]" + } + } + } + ], + "probes": [ + { + "name": "aks-probe", + "properties": { + "protocol": "Https", + "path": "/", + "interval": 30, + "timeout": 30, + "unhealthyThreshold": 3, + "pickHostNameFromBackendHttpSettings": true, + "minServers": 0, + "match": {} + } + } + ], + "backendHttpSettingsCollection": [ + { + "name": "[format('{0}-HttpSettings', parameters('appGWName'))]", + "properties": { + "requestTimeout": 20, + "protocol": "Https", + "port": 443, + "pickHostNameFromBackendAddress": true, + "cookieBasedAffinity": "Disabled", + "probe": { + "id": "[resourceId('Microsoft.Network/applicationGateways/probes', parameters('appGWName'), 'aks-probe')]" + }, + "trustedRootCertificates": "[if(parameters('trustedRootCertificatesRequired'), createArray(createObject('id', resourceId('Microsoft.Network/applicationGateways/trustedRootCertificates', parameters('appGWName'), 'root-cert-wildcard-aks-ingress'))), createArray())]" + } + } + ], + "backendAddressPools": [ + { + "name": "[parameters('aksBackendDomainName')]", + "properties": { + "backendAddresses": [ + { + "fqdn": "[parameters('aksBackendDomainName')]" + } + ] + } + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/publicIPAddresses', variables('appGWPublicIpName'))]" + ] + }, + { + "type": "Microsoft.Insights/diagnosticSettings", + "apiVersion": "2021-05-01-preview", + "scope": "[format('Microsoft.Network/applicationGateways/{0}', parameters('appGWName'))]", + "name": "Microsoft.Insights", + "properties": { + "workspaceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]", + "logs": [ + { + "category": "ApplicationGatewayAccessLog", + "enabled": true + }, + { + "category": "ApplicationGatewayPerformanceLog", + "enabled": true + }, + { + "category": "ApplicationGatewayFirewallLog", + "enabled": true + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.Network/applicationGateways', parameters('appGWName'))]" + ] + } + ] + } + }, + "dependsOn": [ + "[resourceId('Microsoft.Resources/deployments', 'keyVaultStamp')]" + ] + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "2020-10-01", + "name": "monitoring", + "properties": { + "expressionEvaluationOptions": { + "scope": "inner" + }, + "mode": "Incremental", + "parameters": { + "aksClusterName": { + "value": "[parameters('aksClusterName')]" + }, + "location": { + "value": "[parameters('location')]" + }, + "logAnalyticsWorkspaceName": { + "value": "[parameters('logAnalyticsWorkspaceName')]" + } + }, + "template": { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.1272.37030", + "templateHash": "7153158418628871618" + } + }, + "parameters": { + "location": { + "type": "string" + }, + "aksClusterName": { + "type": "string" + }, + "logAnalyticsWorkspaceName": { + "type": "string" + } + }, + "variables": { + "containerInsightsSolutionName": "[format('ContainerInsights({0})', parameters('logAnalyticsWorkspaceName'))]" + }, + "resources": [ + { + "type": "Microsoft.EventGrid/systemTopics", + "apiVersion": "2020-10-15-preview", + "name": "[parameters('aksClusterName')]", + "location": "[parameters('location')]", + "properties": { + "source": "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]", + "topicType": "Microsoft.ContainerService.ManagedClusters" + } + }, + { + "type": "Microsoft.Insights/diagnosticSettings", + "apiVersion": "2021-05-01-preview", + "scope": "[format('Microsoft.EventGrid/systemTopics/{0}', parameters('aksClusterName'))]", + "name": "Microsoft.Insights", + "properties": { + "workspaceId": "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]", + "logs": [ + { + "category": "DeliveryFailures", + "enabled": true + } + ], + "metrics": [ + { + "category": "AllMetrics", + "enabled": true + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.EventGrid/systemTopics', parameters('aksClusterName'))]" + ] + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Node CPU utilization high for {0} CI-1', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "host", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "cpuUsagePercentage", + "metricNamespace": "Insights.Container/nodes", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 80, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "Node CPU utilization across the cluster.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Node working set memory utilization high for {0} CI-2', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "host", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "memoryWorkingSetPercentage", + "metricNamespace": "Insights.Container/nodes", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 80, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "Node working set memory utilization across the cluster.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Jobs completed more than 6 hours ago for {0} CI-11', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "controllerName", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "kubernetes namespace", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "completedJobsCount", + "metricNamespace": "Insights.Container/pods", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 0, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors completed jobs (more than 6 hours ago).", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT1M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Container CPU usage high for {0} CI-9', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "controllerName", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "kubernetes namespace", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "cpuExceededPercentage", + "metricNamespace": "Insights.Container/containers", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 90, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors container CPU utilization.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Container working set memory usage high for {0} CI-10', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "controllerName", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "kubernetes namespace", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "memoryWorkingSetExceededPercentage", + "metricNamespace": "Insights.Container/containers", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 90, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors container working set memory utilization.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Pods in failed state for {0} CI-4', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "phase", + "operator": "Include", + "values": [ + "Failed" + ] + } + ], + "metricName": "podCount", + "metricNamespace": "Insights.Container/pods", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 0, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "Pod status monitoring.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Disk usage high for {0} CI-5', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "host", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "device", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "DiskUsedPercentage", + "metricNamespace": "Insights.Container/nodes", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 80, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors disk usage for all nodes and storage devices.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Nodes in not ready status for {0} CI-3', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "status", + "operator": "Include", + "values": [ + "NotReady" + ] + } + ], + "metricName": "nodesCount", + "metricNamespace": "Insights.Container/nodes", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 0, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "Node status monitoring.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Containers getting OOM killed for {0} CI-6', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "kubernetes namespace", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "controllerName", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "oomKilledContainerCount", + "metricNamespace": "Insights.Container/pods", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 0, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors number of containers killed due to out of memory (OOM) error.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT1M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Persistent volume usage high for {0} CI-18', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "podName", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "kubernetesNamespace", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "pvUsageExceededPercentage", + "metricNamespace": "Insights.Container/persistentvolumes", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 80, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors persistent volume utilization.", + "enabled": false, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Pods not in ready state for {0} CI-8', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "controllerName", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "kubernetes namespace", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "PodReadyPercentage", + "metricNamespace": "Insights.Container/pods", + "name": "Metric1", + "operator": "LessThan", + "threshold": 80, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors for excessive pods not in the ready state.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "microsoft.containerservice/managedclusters", + "windowSize": "PT5M" + } + }, + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[format('Restarting container count for {0} CI-7', parameters('aksClusterName'))]", + "location": "global", + "properties": { + "actions": [], + "criteria": { + "allOf": [ + { + "criterionType": "StaticThresholdCriterion", + "dimensions": [ + { + "name": "kubernetes namespace", + "operator": "Include", + "values": [ + "*" + ] + }, + { + "name": "controllerName", + "operator": "Include", + "values": [ + "*" + ] + } + ], + "metricName": "restartingContainerCount", + "metricNamespace": "Insights.Container/pods", + "name": "Metric1", + "operator": "GreaterThan", + "threshold": 0, + "timeAggregation": "Average", + "skipMetricValidation": true + } + ], + "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria" + }, + "description": "This alert monitors number of containers restarting across the cluster.", + "enabled": true, + "evaluationFrequency": "PT1M", + "scopes": [ + "[resourceId('Microsoft.ContainerService/managedClusters', parameters('aksClusterName'))]" + ], + "severity": 3, + "targetResourceType": "Microsoft.ContainerService/managedClusters", + "windowSize": "PT1M" + } + }, + { + "type": "microsoft.insights/activityLogAlerts", + "apiVersion": "2017-04-01", + "name": "AllAzureAdvisorAlert", + "location": "Global", + "properties": { + "scopes": [ + "[resourceGroup().id]" + ], + "condition": { + "allOf": [ + { + "field": "category", + "equals": "Recommendation" + }, + { + "field": "operationName", + "equals": "Microsoft.Advisor/recommendations/available/action" + } + ] + }, + "actions": { + "actionGroups": [] + }, + "enabled": true, + "description": "All azure advisor alerts" + } + }, + { + "type": "Microsoft.OperationalInsights/workspaces/savedSearches", + "apiVersion": "2020-08-01", + "name": "[format('{0}/AllPrometheus', parameters('logAnalyticsWorkspaceName'))]", + "properties": { + "eTag": "*", + "category": "Prometheus", + "displayName": "All collected Prometheus information", + "query": "InsightsMetrics | where Namespace == \"prometheus\"", + "version": 1 + } + }, + { + "type": "Microsoft.OperationalInsights/workspaces/savedSearches", + "apiVersion": "2020-08-01", + "name": "[format('{0}/NodeRebootRequested', parameters('logAnalyticsWorkspaceName'))]", + "properties": { + "eTag": "*", + "category": "Prometheus", + "displayName": "Nodes reboot required by kured", + "query": "InsightsMetrics | where Namespace == \"prometheus\" and Name == \"kured_reboot_required\" | where Val > 0", + "version": 1 + } + } + ] + } + }, + "dependsOn": [ + "[resourceId('Microsoft.Resources/deployments', 'aks')]", + "[resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceName'))]" + ] + } + ] +} \ No newline at end of file