From 5e8766b078331a20313cd9f360263f075468e1a9 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:22:03 -0400 Subject: [PATCH 01/34] hostpool alerts update - fix multiline changed multi-line for queries from query: 'dsfasdfasf to query: >- asdfasdf asdfasdf --- .../hostPools/alerts.yaml | 753 ++---------------- 1 file changed, 68 insertions(+), 685 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index b5584f014..5551dbdc0 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -1,197 +1,7 @@ -- name: Capacity 85 Percent (xHostPoolNamex) - description: - This alert is based on the Action Account and Runbook that populates the Log Analytics specificed with the AVD Metrics Deployment Solution for xHostPoolNamex. - -->Last Number in the string is the Percentage Remaining for the Host Pool. - Output is - - HostPoolName|ResourceGroup|Type|MaxSessionLimit|NumberHosts|TotalUsers|DisconnectedUser|ActiveUsers|SessionsAvailable|HostPoolPercentageLoad' +- name: No Resources Available + description: Catastrophic Event! Indicates potential problems with dependencies, diagnose and resolve. type: Log - verified: false - visible: true - tags: - - avd - properties: - severity: 2 - operator: GreaterThanOrEqual - timeAggregation: Count - windowSize: PT30M - evaluationFrequency: PT5M - threshold: 1 - resouceIdColumn: ResourceId - dimensions: - - name: HostPoolName - operator: Include - values: - - "*" - - name: UserSessionsTotal - operator: Include - values: - - "*" - - name: UserSessionsDisconnected - operator: Include - values: - - "*" - - name: UserSessionsActive - operator: Include - values: - - "*" - - name: UserSessionsAvailable - operator: Include - values: - - "*" - - name: HostPoolPercentLoad - operator: Include - values: - - "*" - failingPeriods: - numberOfEvaluationPeriods: 1 - minFailingPeriodsToAlert: 1 - query: 'AzureDiagnostics - - | where Category has "JobStreams" and StreamType_s == "Output" and RunbookName_s == "AvdHostPoolLogData" - - | sort by TimeGenerated - - | where TimeGenerated > now() - 5m - - | extend HostPoolName=tostring(split(ResultDescription, ''|'')[0]) - - | extend ResourceGroup=tostring(split(ResultDescription, ''|'')[1]) - - | extend Type=tostring(split(ResultDescription, ''|'')[2]) - - | extend MaxSessionLimit=toint(split(ResultDescription, ''|'')[3]) - - | extend NumberSessionHosts=toint(split(ResultDescription, ''|'')[4]) - - | extend UserSessionsTotal=toint(split(ResultDescription, ''|'')[5]) - - | extend UserSessionsDisconnected=toint(split(ResultDescription, ''|'')[6]) - - | extend UserSessionsActive=toint(split(ResultDescription, ''|'')[7]) - - | extend UserSessionsAvailable=toint(split(ResultDescription, ''|'')[8]) - - | extend HostPoolPercentLoad=toint(split(ResultDescription, ''|'')[9]) - - | extend HPResourceId=tostring(split(ResultDescription, ''|'')[13]) - - | extend ResourceId=tostring(HPResourceId) - - | where HostPoolPercentLoad >= 85 and HostPoolPercentLoad < 95 - - | where HostPoolName =~ ''xHostPoolNamex''' - autoMitigate: true - autoResolve: true - autoResolveTime: "0:30:00" - references: - deployments: - - name: AVD-HostPool - template: Deploy-AVD-HostPool-Alert.json - type: Policy - tags: - - alz - properties: - scope: Subscription - multiResource: false -- name: Capacity 95 Percent (xHostPoolNamex) - description: - This alert is based on the Action Account and Runbook that populates the Log Analytics specificed with the AVD Metrics Deployment Solution for xHostPoolNamex. - -->Last Number in the string is the Percentage Remaining for the Host Pool. - Output is - - HostPoolName|ResourceGroup|Type|MaxSessionLimit|NumberHosts|TotalUsers|DisconnectedUser|ActiveUsers|SessionsAvailable|HostPoolPercentageLoad' - type: Log - verified: false - visible: true - tags: - - avd - properties: - severity: 1 - operator: GreaterThanOrEqual - timeAggregation: Count - windowSize: PT30M - evaluationFrequency: PT5M - threshold: 1 - resouceIdColumn: ResourceId - dimensions: - - name: HostPoolName - operator: Include - values: - - "*" - - name: UserSessionsTotal - operator: Include - values: - - "*" - - name: UserSessionsDisconnected - operator: Include - values: - - "*" - - name: UserSessionsActive - operator: Include - values: - - "*" - - name: UserSessionsAvailable - operator: Include - values: - - "*" - - name: HostPoolPercentLoad - operator: Include - values: - - "*" - failingPeriods: - numberOfEvaluationPeriods: 1 - minFailingPeriodsToAlert: 1 - query: 'AzureDiagnostics - - | where Category has "JobStreams" and StreamType_s == "Output" and RunbookName_s == "AvdHostPoolLogData" - - | sort by TimeGenerated - - | where TimeGenerated > now() - 5m - - | extend HostPoolName=tostring(split(ResultDescription, ''|'')[0]) - - | extend ResourceGroup=tostring(split(ResultDescription, ''|'')[1]) - - | extend Type=tostring(split(ResultDescription, ''|'')[2]) - - | extend MaxSessionLimit=toint(split(ResultDescription, ''|'')[3]) - - | extend NumberSessionHosts=toint(split(ResultDescription, ''|'')[4]) - - | extend UserSessionsTotal=toint(split(ResultDescription, ''|'')[5]) - - | extend UserSessionsDisconnected=toint(split(ResultDescription, ''|'')[6]) - - | extend UserSessionsActive=toint(split(ResultDescription, ''|'')[7]) - - | extend UserSessionsAvailable=toint(split(ResultDescription, ''|'')[8]) - - | extend HostPoolPercentLoad=toint(split(ResultDescription, ''|'')[9]) - - | extend HPResourceId=tostring(split(ResultDescription, ''|'')[13]) - - | extend ResourceId=tostring(HPResourceId) - - | where HostPoolPercentLoad >= 95 - - | where HostPoolName =~ ''xHostPoolNamex''' - autoMitigate: true - autoResolve: true - autoResolveTime: "0:30:00" - references: - deployments: - - name: AVD-HostPool - template: Deploy-AVD-HostPool-Alert.json - type: Policy - tags: - - alz - properties: - scope: Subscription - multiResource: false -- name: No Resources Available (xHostPoolNamex) - description: Catastrophic Event! Indicates potential problems with dependencies, diagnose and resolve for xHostPoolNamex. - type: Log - verified: false + verified: true visible: true tags: - avd @@ -215,38 +25,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'WVDConnections - + query: >- + WVDConnections | where TimeGenerated > ago (15m) - - | where _ResourceId contains "xHostPoolNamex" - | project-away TenantId,SourceSystem - | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State== \''Started\'', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State== \''Connected\'', TimeGenerated , datetime(null) )) by CorrelationId - | join kind=leftouter (WVDErrors - |summarize Errors=makelist(pack(\''Code\'', Code, \''CodeSymbolic\'', CodeSymbolic, \''Time\'', TimeGenerated, \''Message\'', Message ,\''ServiceError\'', ServiceError, \''Source\'', Source)) by CorrelationId - ) on CorrelationId - | join kind=leftouter (WVDCheckpoints - | summarize Checkpoints=makelist(pack(\''Time\'', TimeGenerated, \''Name\'', Name, \''Parameters\'', Parameters, \''Source\'', Source)) by CorrelationId - | mv-apply Checkpoints on ( - order by todatetime(Checkpoints[\''Time\'']) asc - | summarize Checkpoints=makelist(Checkpoints)) - ) on CorrelationId - | project-away CorrelationId1, CorrelationId2 - | order by TimeGenerated desc - | where Errors[0].CodeSymbolic == "ConnectionFailedNoHealthyRdshAvailable"' autoMitigate: true autoResolve: true @@ -261,10 +55,10 @@ properties: scope: Subscription multiResource: false -- name: User Disconnected over 24h (xHostPoolNamex) - description: Verify Remote Desktop Policies are applied relating to Session Limits for xHostPoolNamex. This could impact your scaling plan as well. +- name: User Disconnected over 24h + description: Verify Remote Desktop Policies are applied relating to Session Limits. This could impact your scaling plan as well. type: Log - verified: false + verified: true visible: true tags: - avd @@ -288,29 +82,18 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'WVDConnections - + query: >- + WVDConnections | where TimeGenerated > ago(24h) - | where State == "Connected" - - | where _ResourceId contains "xHostPoolNamex" - | project CorrelationId , UserName, ConnectionType, StartTime=TimeGenerated, SessionHostName - | join (WVDConnections - | where State == "Completed" - | project EndTime=TimeGenerated, CorrelationId) - on CorrelationId - | project Duration = EndTime - StartTime, ConnectionType, UserName, SessionHostName - | where Duration >= timespan(24:00:00) - - | sort by Duration desc' + | sort by Duration desc autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -324,10 +107,10 @@ properties: scope: Subscription multiResource: false -- name: User Disconnected over 72h (xHostPoolNamex) - description: Verify Remote Desktop Policies are applied relating to Session Limits for xHostPoolNamex. This could impact your scaling plan as well. +- name: User Disconnected over 72h + description: Verify Remote Desktop Policies are applied relating to Session Limits. This could impact your scaling plan as well. type: Log - verified: false + verified: true visible: true tags: - avd @@ -355,7 +138,6 @@ WVDConnections | where TimeGenerated > ago(24h) | where State == "Connected" - | where _ResourceId contains "xHostPoolNamex" | project CorrelationId , UserName, ConnectionType, StartTime=TimeGenerated, SessionHostName | join (WVDConnections | where State == "Completed" @@ -377,180 +159,10 @@ properties: scope: Subscription multiResource: false -- name: Local Disk Space less than 10% (xHostPoolNamex) - description: Disk space Moderately Low. \nConsider review of the VM local C drive and determine what is consuming disk space for the VM in xHostPoolNamex. This could be local profiles or temp files that need to be cleaned up or removed. - type: Log - verified: false - visible: true - tags: - - avd - properties: - severity: 2 - operator: GreaterThanOrEqual - timeAggregation: Count - windowSize: PT15M - evaluationFrequency: PT15M - threshold: 1 - resouceIdColumn: _ResourceId - dimensions: - - name: ComputerName - operator: Include - values: - - "*" - - name: VMresourceGroup - operator: Include - values: - - "*" - - name: HostPool - operator: Include - values: - - "*" - failingPeriods: - numberOfEvaluationPeriods: 1 - minFailingPeriodsToAlert: 1 - query: 'Perf - - | where TimeGenerated > ago(15m) - - | where ObjectName == "LogicalDisk" and CounterName == "% Free Space" - - | where InstanceName !contains "D:" - - | where InstanceName !contains "_Total" | where CounterValue <= 10.00 - - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - - | summarize arg_max(TimeGenerated, *) by ComputerName - - | extend ComputerName=tolower(ComputerName) - - | project ComputerName, CounterValue, subscription, ResourceGroup, TimeGenerated - - | join kind = leftouter - - (WVDAgentHealthStatus - - | where TimeGenerated > ago(15m) - - | where _ResourceId contains "xHostPoolNamex" - - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - - | extend ComputerName=tolower(ComputerName) - - | summarize arg_max(TimeGenerated,*) by ComputerName - - | project VMresourceGroup, ComputerName, HostPool, _ResourceId - - ) on ComputerName - - | where ComputerName1 contains ComputerName' - autoMitigate: true - autoResolve: true - autoResolveTime: "0:30:00" - references: - deployments: - - name: AVD-HostPool - template: Deploy-AVD-HostPool-Alert.json - type: Policy - tags: - - alz - properties: - scope: Subscription - multiResource: false -- name: Local Disk Space less than 5% (xHostPoolNamex) - description: Disk space Moderately Low. \nConsider review of the VM local C drive and determine what is consuming disk space for the VM in xHostPoolNamex. This could be local profiles or temp files that need to be cleaned up or removed. +- name: FSLogix Profile less than 5% + description: User Profiles Service logged Event ID 33. Expand User's Virtual Profile Disk and/or clean up user profile data on the VM. type: Log - verified: false - visible: true - tags: - - avd - properties: - severity: 1 - operator: GreaterThanOrEqual - timeAggregation: Count - windowSize: PT15M - evaluationFrequency: PT15M - threshold: 1 - resouceIdColumn: _ResourceId - dimensions: - - name: ComputerName - operator: Include - values: - - "*" - - name: VMresourceGroup - operator: Include - values: - - "*" - - name: HostPool - operator: Include - values: - - "*" - failingPeriods: - numberOfEvaluationPeriods: 1 - minFailingPeriodsToAlert: 1 - query: 'Perf - - | where TimeGenerated > ago(15m) - - | where ObjectName == "LogicalDisk" and CounterName == "% Free Space" - - | where InstanceName !contains "D:" - - | where InstanceName !contains "_Total" - - | where CounterValue <= 5.00 - - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - - | summarize arg_max(TimeGenerated, *) by ComputerName - - | extend ComputerName=tolower(ComputerName) - - | project ComputerName, CounterValue, subscription, ResourceGroup, TimeGenerated - - | join kind = leftouter - - ( - - WVDAgentHealthStatus - - | where TimeGenerated > ago(15m) - - | where _ResourceId contains "xHostPoolNamex" - - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - - | extend ComputerName=tolower(ComputerName) - - | summarize arg_max(TimeGenerated,*) by ComputerName - - | project VMresourceGroup, ComputerName, HostPool, _ResourceId - - ) on ComputerName - - | where ComputerName1 contains ComputerName' - autoMitigate: true - autoResolve: true - autoResolveTime: "0:30:00" - references: - deployments: - - name: AVD-HostPool - template: Deploy-AVD-HostPool-Alert.json - type: Policy - tags: - - alz - properties: - scope: Subscription - multiResource: false -- name: FSLogix Profile less than 5% (xHostPoolNamex) - description: User Profiles Service logged Event ID 33. Expand User's Virtual Profile Disk and/or clean up user profile data on the VM in xHostPoolNamex. - type: Log - verified: false + verified: true visible: true tags: - avd @@ -581,37 +193,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Event - + query: >- + Event | where EventLog == "Microsoft-FSLogix-Apps/Admin" - | where EventLevelName == "Warning" - | where EventID == 34 - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated - | join kind = leftouter - (WVDAgentHealthStatus - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool - - ) on ComputerName' + ) on ComputerName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -625,10 +222,10 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Profile less than 2% (xHostPoolNamex) - description: User Profiles Service logged Event ID 34. Expand User's Virtual Profile Disk and/or clean up user profile data on the VM in xHostPoolNamex. +- name: FSLogix Profile less than 2% + description: User Profiles Service logged Event ID 34. Expand User's Virtual Profile Disk and/or clean up user profile data on the VM. type: Log - verified: false + verified: true visible: true tags: - avd @@ -659,37 +256,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Event - + query: >- + Event | where EventLog == "Microsoft-FSLogix-Apps/Admin" - | where EventLevelName == "Error" - | where EventID == 33 - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated - | join kind = leftouter - (WVDAgentHealthStatus - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool - - ) on ComputerName' + ) on ComputerName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -703,10 +285,10 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Network Issue (xHostPoolNamex) - description: User Profiles Service logged Event ID 43. Verify network communications between the storage and AVD VM related to xHostPoolNamex. +- name: FSLogix Network Issue + description: User Profiles Service logged Event ID 43. Verify network communications between the storage and AVD VM. type: Log - verified: false + verified: true visible: true tags: - avd @@ -737,37 +319,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Event - + query: >- + Event | where EventLog == "Microsoft-FSLogix-Apps/Admin" - | where EventLevelName == "Error" - | where EventID == 43 - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated - | join kind = leftouter - (WVDAgentHealthStatus - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool - - ) on ComputerName' + ) on ComputerName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -781,10 +348,10 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Profile Disk Failed to Attach (xHostPoolNamex) - description: User Profiles Service logged an Event ID 52 or 40. Investigate error details for reason regarding xHostPoolNamex. +- name: FSLogix Profile Disk Failed to Attach + description: User Profiles Service logged an Event ID 52 or 40. Investigate error details for reason. type: Log - verified: false + verified: true visible: true tags: - avd @@ -816,37 +383,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Event - + query: >- + Event | where EventLog == "Microsoft-FSLogix-Apps/Admin" - | where EventLevelName == "Error" - | where EventID == 42 or EventID == 40 - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated - | join kind = leftouter - (WVDAgentHealthStatus - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool - - ) on ComputerName' + ) on ComputerName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -860,10 +412,10 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Service Disabled (xHostPoolNamex) - description: User Profile Service Disabled. Determine why service was disabled and re-enable / start the FSLogix service. Regarding xHostPoolNamex. +- name: FSLogix Service Disabled + description: User Profile Service Disabled. Determine why service was disabled and re-enable / start the FSLogix service. type: Log - verified: false + verified: true visible: true tags: - avd @@ -895,37 +447,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Event - + query: >- + Event | where EventLog == "Microsoft-FSLogix-Apps/Admin" - | where EventLevelName == "Warning" - | where EventID == 60 - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated - | join kind = leftouter - (WVDAgentHealthStatus - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool - - ) on ComputerName' + ) on ComputerName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -939,10 +476,10 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Disk Compact Failure (xHostPoolNamex) - description: User Profile Service logged Event ID 62 or 63. The profile Disk was marked for compaction due to additional white space but failed. See error details for additional information regarding xHostPoolNamex. +- name: FSLogix Disk Compact Failure + description: User Profile Service logged Event ID 62 or 63. The profile Disk was marked for compaction due to additional white space but failed. See error details for additional information. type: Log - verified: false + verified: true visible: true tags: - avd @@ -974,37 +511,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Event - + query: >- + Event | where EventLog == "Microsoft-FSLogix-Apps/Admin" - | where EventLevelName == "Error" - | where EventID == 62 or EventID == 63 - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated - | join kind = leftouter - (WVDAgentHealthStatus - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool - - ) on ComputerName' + ) on ComputerName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -1018,10 +540,10 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Disk Already In Use (xHostPoolNamex) - description: User Profile Service logged an Event ID 51. This indicates that a user attempted to load their profile disk but it was in use or possibly mapped to another VM. Ensure the user is not connected to another host pool or remote app with the same profile. Regarding xHostPoolNamex. +- name: FSLogix Disk Already In Use + description: User Profile Service logged an Event ID 51. This indicates that a user attempted to load their profile disk but it was in use or possibly mapped to another VM. Ensure the user is not connected to another host pool or remote app with the same profile. type: Log - verified: false + verified: true visible: true tags: - avd @@ -1053,37 +575,22 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Event - + query: >- + Event | where EventLog == "Microsoft-FSLogix-Apps/Operational" - | where EventLevelName == "Warning" - | where EventID == 51 - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" ResourceGroup "/providers/microsoft.compute/virtualmachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated - | join kind = leftouter - (WVDAgentHealthStatus - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool - - ) on ComputerName' + ) on ComputerName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -1097,10 +604,10 @@ properties: scope: Subscription multiResource: false -- name: Session Host Healthcheck Failure (xHostPoolNamex) - description: VM is available for use but one of the dependent resources is in a failed state for hostpool xHostPoolNamex. +- name: Session Host Healthcheck Failure + description: VM is available for use but one of the dependent resources is in a failed state. type: Log - verified: false + verified: true visible: true tags: - avd @@ -1132,59 +639,33 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'let MapToDesc = (idx: long) { - + query: >- + let MapToDesc = (idx: long) { case(idx == 0, "DomainJoin", - idx == 1, "DomainTrust", - idx == 2, "FSLogix", - idx == 3, "SxSStack", - idx == 4, "URLCheck", - idx == 5, "GenevaAgent", - idx == 6, "DomainReachable", - idx == 7, "WebRTCRedirector", - idx == 8, "SxSStackEncryption", - idx == 9, "IMDSReachable", - idx == 10, "MSIXPackageStaging", - "InvalidIndex")}; - WVDAgentHealthStatus - | where TimeGenerated > ago(10m) - | where Status != \''Available\'' - | where AllowNewSessions = True - | extend CheckFailed = parse_json(SessionHostHealthCheckResult) - | mv-expand CheckFailed - | where CheckFailed.AdditionalFailureDetails.ErrorCode != 0 - | extend HealthCheckName = tolong(CheckFailed.HealthCheckName) - | extend HealthCheckResult = tolong(CheckFailed.HealthCheckResult) - | extend HealthCheckDesc = MapToDesc(HealthCheckName) - | where HealthCheckDesc != \''InvalidIndex\'' - - | where _ResourceId contains "xHostPoolNamex" - | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" HostPoolResourceGroup "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - - | parse SessionHostResourceId with "/subscriptions/" HostSubscription "/resourceGroups/" SessionHostRG " /providers/Microsoft.Compute/virtualMachines/" SessionHostName' + | parse SessionHostResourceId with "/subscriptions/" HostSubscription "/resourceGroups/" SessionHostRG " /providers/Microsoft.Compute/virtualMachines/" SessionHostName autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" @@ -1198,87 +679,10 @@ properties: scope: Subscription multiResource: false -- name: Personal Desktop Assigned Healthcheck Failure (xHostPoolNamex) - description: VM is assigned to a user but one of the dependent resources is in a failed state for hostpool xHostPoolNamex. This alert relies on the runbook AvdHostPoolLogData. +- name: User Connection to Session Host Failure + description: A user failed to connect to a VM. There are lots of variables between the end uers and AVD VMs. If this is frequent for the user, determine if their Internet connection is slow or latency is over 150 ms. type: Log - verified: false - visible: true - tags: - - avd - properties: - severity: 1 - operator: GreaterThanOrEqual - timeAggregation: Count - windowSize: PT5M - evaluationFrequency: PT5M - resourceIdColumn: _ResourceId - threshold: 1 - dimensions: - - name: SessionHostName - operator: Include - values: - - "*" - - name: HealthCheckDesc - operator: Include - values: - - "*" - - name: HostPool - operator: Include - values: - - "*" - - name: SessionHostRG - operator: Include - values: - - "*" - failingPeriods: - numberOfEvaluationPeriods: 1 - minFailingPeriodsToAlert: 1 - query: 'AzureDiagnostics - - | where Category has "JobStreams" and StreamType_s == "Output" and RunbookName_s == "AvdHostPoolLogData" - - | sort by TimeGenerated - - | where TimeGenerated > ago(15m) - - | extend HostPoolName=tostring(split(ResultDescription, ''|'')[0]) - - | extend ResourceGroup=tostring(split(ResultDescription, ''|'')[1]) - - | extend Type=tostring(split(ResultDescription, ''|'')[2]) - - | extend NumberSessionHosts=toint(split(ResultDescription, ''|'')[4]) - - | extend UserSessionsActive=toint(split(ResultDescription, ''|'')[7]) - - | extend NumPersonalUnhealthy=toint(split(ResultDescription, ''|'')[10]) - - | extend PersonalSessionHost=extract_json("$.SessionHost", tostring(split(ResultDescription, ''|'')[11]), typeof(string)) - - | extend PersonalAssignedUser=extract_json("$.AssignedUser", tostring(split(ResultDescription, ''|'')[11]), typeof(string)) - - | where HostPoolName =~ ''xHostPoolNamex'' - - | where Type == ''Personal'' - - | where NumPersonalUnhealthy > 0 ' - autoMitigate: true - autoResolve: true - autoResolveTime: "0:30:00" - references: - deployments: - - name: AVD-HostPool - template: Deploy-AVD-HostPool-Alert.json - type: Policy - tags: - - alz - properties: - scope: Subscription - multiResource: false -- name: User Connection to Session Host Failure (xHostPoolNamex) - description: While trying to connect to xHostPoolNamex a user had an error and failed to connect to a VM. There are lots of variables between the end uers and AVD VMs. If this is frequent for the user, determine if their Internet connection is slow or latency is over 150 ms. Regarding xHostPoolNamex. - type: Log - verified: false + verified: true visible: true tags: - avd @@ -1330,49 +734,28 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'WVDConnections - + query: >- + WVDConnections // | where UserName == "upn.here@contoso.com" - | project-away TenantId,SourceSystem - | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State==''Started'', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State==''Connected'', TimeGenerated , datetime(null) )) by CorrelationId - | join kind=leftouter (WVDErrors - |summarize Errors=make_list(pack(''Code'', Code, ''CodeSymbolic'', CodeSymbolic, ''Time'', TimeGenerated, ''Message'', Message ,''ServiceError'', ServiceError, ''Source'', Source)) by CorrelationId - ) on CorrelationId - | join kind=leftouter (WVDCheckpoints - | summarize Checkpoints=make_list(pack(''Time'', TimeGenerated, ''Name'', Name, ''Parameters'', Parameters, ''Source'', Source)) by CorrelationId - | mv-apply Checkpoints on ( - order by todatetime(Checkpoints[''Time'']) asc - | summarize Checkpoints=make_list(Checkpoints)) - ) on CorrelationId - | project-away CorrelationId1, CorrelationId2 - | order by TimeGenerated desc - | where TimeGenerated > ago(15m) - | extend ResourceGroup=tostring(split(_ResourceId, ''/'')[4]) - | extend HostPool=tostring(split(_ResourceId, ''/'')[8]) - - | where HostPool =~ ''xHostPoolNamex'' - | extend ErrorShort=tostring(Errors[0].CodeSymbolic) - | extend ErrorMessage=tostring(Errors[0].Message) - - | project TimeGenerated, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage' + | project TimeGenerated, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From 6e5391bd78dae82958e29b3addcf63556546afb2 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:34:32 -0400 Subject: [PATCH 02/34] host pool alerts update yaml syntax --- services/DesktopVirtualization/hostPools/alerts.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 5551dbdc0..8a6cc530e 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -29,19 +29,19 @@ WVDConnections | where TimeGenerated > ago (15m) | project-away TenantId,SourceSystem - | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State== \''Started\'', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State== \''Connected\'', TimeGenerated , datetime(null) )) by CorrelationId + | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State== 'Started', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State== 'Connected', TimeGenerated , datetime(null) )) by CorrelationId | join kind=leftouter (WVDErrors - |summarize Errors=makelist(pack(\''Code\'', Code, \''CodeSymbolic\'', CodeSymbolic, \''Time\'', TimeGenerated, \''Message\'', Message ,\''ServiceError\'', ServiceError, \''Source\'', Source)) by CorrelationId + |summarize Errors=makelist(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message ,'ServiceError', ServiceError, 'Source', Source)) by CorrelationId ) on CorrelationId | join kind=leftouter (WVDCheckpoints - | summarize Checkpoints=makelist(pack(\''Time\'', TimeGenerated, \''Name\'', Name, \''Parameters\'', Parameters, \''Source\'', Source)) by CorrelationId + | summarize Checkpoints=makelist(pack('Time', TimeGenerated, 'Name', Name, 'Parameters', Parameters, 'Source', Source)) by CorrelationId | mv-apply Checkpoints on ( - order by todatetime(Checkpoints[\''Time\'']) asc + order by todatetime(Checkpoints['Time']) asc | summarize Checkpoints=makelist(Checkpoints)) ) on CorrelationId | project-away CorrelationId1, CorrelationId2 | order by TimeGenerated desc - | where Errors[0].CodeSymbolic == "ConnectionFailedNoHealthyRdshAvailable"' + | where Errors[0].CodeSymbolic == "ConnectionFailedNoHealthyRdshAvailable" autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From 785f6bbbb312d0bd323a958ceed2725a3e25114a Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:36:51 -0400 Subject: [PATCH 03/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 8a6cc530e..600851c40 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -655,7 +655,7 @@ "InvalidIndex")}; WVDAgentHealthStatus | where TimeGenerated > ago(10m) - | where Status != \''Available\'' + | where Status != 'Available' | where AllowNewSessions = True | extend CheckFailed = parse_json(SessionHostHealthCheckResult) | mv-expand CheckFailed @@ -663,7 +663,7 @@ | extend HealthCheckName = tolong(CheckFailed.HealthCheckName) | extend HealthCheckResult = tolong(CheckFailed.HealthCheckResult) | extend HealthCheckDesc = MapToDesc(HealthCheckName) - | where HealthCheckDesc != \''InvalidIndex\'' + | where HealthCheckDesc != 'InvalidIndex' | parse _ResourceId with "/subscriptions/" subscription "/resourcegroups/" HostPoolResourceGroup "/providers/microsoft.desktopvirtualization/hostpools/" HostPool | parse SessionHostResourceId with "/subscriptions/" HostSubscription "/resourceGroups/" SessionHostRG " /providers/Microsoft.Compute/virtualMachines/" SessionHostName autoMitigate: true From 7f66de6259d87ea68ef8d8bef3cc5fee48d69fce Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:40:19 -0400 Subject: [PATCH 04/34] Update alerts.yaml --- .../hostPools/alerts.yaml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 600851c40..0ee08a1ee 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -296,7 +296,7 @@ severity: 1 operator: GreaterThanOrEqual timeAggregation: Count - windowSize: PT1D + windowSize: PT24H evaluationFrequency: PT5M threshold: 1 dimensions: @@ -359,7 +359,7 @@ severity: 1 operator: GreaterThanOrEqual timeAggregation: Count - windowSize: PT1D + windowSize: PT24H evaluationFrequency: PT5M resourceIdColumn: _ResourceId threshold: 1 @@ -423,7 +423,7 @@ severity: 1 operator: GreaterThanOrEqual timeAggregation: Count - windowSize: PT1D + windowSize: PT24H evaluationFrequency: PT5M resourceIdColumn: _ResourceId threshold: 1 @@ -487,7 +487,7 @@ severity: 2 operator: GreaterThanOrEqual timeAggregation: Count - windowSize: PT1D + windowSize: PT24H evaluationFrequency: PT5M resourceIdColumn: _ResourceId threshold: 1 @@ -551,7 +551,7 @@ severity: 2 operator: GreaterThanOrEqual timeAggregation: Count - windowSize: PT1D + windowSize: PT24H evaluationFrequency: PT5M resourceIdColumn: _ResourceId threshold: 1 @@ -738,21 +738,21 @@ WVDConnections // | where UserName == "upn.here@contoso.com" | project-away TenantId,SourceSystem - | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State==''Started'', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State==''Connected'', TimeGenerated , datetime(null) )) by CorrelationId + | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State=='Started', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State=='Connected', TimeGenerated , datetime(null) )) by CorrelationId | join kind=leftouter (WVDErrors - |summarize Errors=make_list(pack(''Code'', Code, ''CodeSymbolic'', CodeSymbolic, ''Time'', TimeGenerated, ''Message'', Message ,''ServiceError'', ServiceError, ''Source'', Source)) by CorrelationId + |summarize Errors=make_list(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message ,'ServiceError', ServiceError, 'Source', Source)) by CorrelationId ) on CorrelationId | join kind=leftouter (WVDCheckpoints - | summarize Checkpoints=make_list(pack(''Time'', TimeGenerated, ''Name'', Name, ''Parameters'', Parameters, ''Source'', Source)) by CorrelationId + | summarize Checkpoints=make_list(pack('Time', TimeGenerated, 'Name', Name, 'Parameters', Parameters, 'Source', Source)) by CorrelationId | mv-apply Checkpoints on ( - order by todatetime(Checkpoints[''Time'']) asc + order by todatetime(Checkpoints['Time']) asc | summarize Checkpoints=make_list(Checkpoints)) ) on CorrelationId | project-away CorrelationId1, CorrelationId2 | order by TimeGenerated desc | where TimeGenerated > ago(15m) - | extend ResourceGroup=tostring(split(_ResourceId, ''/'')[4]) - | extend HostPool=tostring(split(_ResourceId, ''/'')[8]) + | extend ResourceGroup=tostring(split(_ResourceId, '/')[4]) + | extend HostPool=tostring(split(_ResourceId, '/')[8]) | extend ErrorShort=tostring(Errors[0].CodeSymbolic) | extend ErrorMessage=tostring(Errors[0].Message) | project TimeGenerated, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage From ad808f1366c5b906995e04a499950e58b1bf8744 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:46:59 -0400 Subject: [PATCH 05/34] Update alerts.yaml --- .../hostPools/alerts.yaml | 46 +++++++++++++++---- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 0ee08a1ee..e9e3fdb2f 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -159,7 +159,7 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Profile less than 5% +- name: FSLogix Profile less than 5 Percent description: User Profiles Service logged Event ID 33. Expand User's Virtual Profile Disk and/or clean up user profile data on the VM. type: Log verified: true @@ -190,6 +190,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -207,7 +211,7 @@ | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName | extend ComputerName=tolower(ComputerName) | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -222,7 +226,7 @@ properties: scope: Subscription multiResource: false -- name: FSLogix Profile less than 2% +- name: FSLogix Profile less than 2 Percent description: User Profiles Service logged Event ID 34. Expand User's Virtual Profile Disk and/or clean up user profile data on the VM. type: Log verified: true @@ -253,6 +257,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -270,7 +278,7 @@ | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName | extend ComputerName=tolower(ComputerName) | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -316,6 +324,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -333,7 +345,7 @@ | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName | extend ComputerName=tolower(ComputerName) | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -380,6 +392,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -397,7 +413,7 @@ | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName | extend ComputerName=tolower(ComputerName) | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -444,6 +460,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -461,7 +481,7 @@ | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName | extend ComputerName=tolower(ComputerName) | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -508,6 +528,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -525,7 +549,7 @@ | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName | extend ComputerName=tolower(ComputerName) | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -572,6 +596,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -589,7 +617,7 @@ | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName | extend ComputerName=tolower(ComputerName) | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true From 863057872b0497f1bb7e1c498b3b19eb091d0ac2 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:51:03 -0400 Subject: [PATCH 06/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index e9e3fdb2f..eb92bfb39 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,10 +723,6 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: HostPool - operator: Include - values: - - "*" - name: ResourceGroup operator: Include values: @@ -759,6 +755,10 @@ operator: Include values: - "*" + - name: HostPool + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From da4d0734c4693e4cea16bb1ed7354498e3c650fa Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:53:57 -0400 Subject: [PATCH 07/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index eb92bfb39..f8090186a 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,10 +723,6 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: ResourceGroup - operator: Include - values: - - "*" - name: UserName operator: Include values: @@ -759,6 +755,10 @@ operator: Include values: - "*" + - name: ResourceGroup + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From c9fc300d663bcc266c7b1a9ebcb3aaee823cd600 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:55:38 -0400 Subject: [PATCH 08/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index f8090186a..7856f5c38 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -743,19 +743,19 @@ operator: Include values: - "*" - - name: ErrorShort + - name: HostPool operator: Include values: - "*" - - name: ErrorMessage + - name: ResourceGroup operator: Include values: - "*" - - name: HostPool + - name: ErrorShort operator: Include values: - "*" - - name: ResourceGroup + - name: ErrorMessage operator: Include values: - "*" From c1f08f27092dba232a7f5eabd1a87dbd50e92d25 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:58:03 -0400 Subject: [PATCH 09/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 7856f5c38..6e8ff868f 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -743,10 +743,6 @@ operator: Include values: - "*" - - name: HostPool - operator: Include - values: - - "*" - name: ResourceGroup operator: Include values: @@ -759,6 +755,10 @@ operator: Include values: - "*" + - name: HostPool + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From eaf35610f4bb485f0d7cf514fff16c66942be6fc Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:59:51 -0400 Subject: [PATCH 10/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 6e8ff868f..f8090186a 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -743,10 +743,6 @@ operator: Include values: - "*" - - name: ResourceGroup - operator: Include - values: - - "*" - name: ErrorShort operator: Include values: @@ -759,6 +755,10 @@ operator: Include values: - "*" + - name: ResourceGroup + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From 90a96043607cc4039526c31920f1c90467a539a4 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:01:34 -0400 Subject: [PATCH 11/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index f8090186a..433069d78 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -743,10 +743,6 @@ operator: Include values: - "*" - - name: ErrorShort - operator: Include - values: - - "*" - name: ErrorMessage operator: Include values: @@ -759,6 +755,10 @@ operator: Include values: - "*" + - name: ErrorShort + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From edeb0d2d1dfdb89d65a9a412667658f416298a72 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:04:17 -0400 Subject: [PATCH 12/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 433069d78..7856f5c38 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -743,10 +743,6 @@ operator: Include values: - "*" - - name: ErrorMessage - operator: Include - values: - - "*" - name: HostPool operator: Include values: @@ -759,6 +755,10 @@ operator: Include values: - "*" + - name: ErrorMessage + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From 173b2f4f45d56e126c29c9b44f0ae60388c2c479 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:09:43 -0400 Subject: [PATCH 13/34] Update alerts.yaml --- .../DesktopVirtualization/hostPools/alerts.yaml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 7856f5c38..a039831ba 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,31 +723,30 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: UserName + - name: HostPool operator: Include values: - "*" - - name: ClientOS + - name: ResourceGroup operator: Include values: - "*" - - name: ClientVersion + - name: UserName operator: Include values: - "*" - - name: ClientSideIPAddress + - name: ClientOS operator: Include values: - "*" - - name: ConnectionType + - name: ClientVersion operator: Include values: - - "*" - - name: HostPool + - name: ClientSideIPAddress operator: Include values: - "*" - - name: ResourceGroup + - name: ConnectionType operator: Include values: - "*" From f3f405f6ae21bf586dd0cc1d4808584c140eaa47 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:12:14 -0400 Subject: [PATCH 14/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index a039831ba..5404fe543 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,6 +723,10 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: + - name: TimeGenerated + operator: Include + values: + - "*" - name: HostPool operator: Include values: From e4d4f6ac4d6047a0064374c0fdcae0d0856d4aef Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:14:13 -0400 Subject: [PATCH 15/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 5404fe543..f85f5b6be 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -762,6 +762,10 @@ operator: Include values: - "*" + - name: _ResourceId + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -786,7 +790,7 @@ | extend HostPool=tostring(split(_ResourceId, '/')[8]) | extend ErrorShort=tostring(Errors[0].CodeSymbolic) | extend ErrorMessage=tostring(Errors[0].Message) - | project TimeGenerated, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage + | project TimeGenerated, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, _ResourceId autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From 1b9fc18980b4baece16b2883a51deae8dd1ca3ac Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:16:38 -0400 Subject: [PATCH 16/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index f85f5b6be..4fc547668 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -746,6 +746,7 @@ - name: ClientVersion operator: Include values: + - "*" - name: ClientSideIPAddress operator: Include values: From 2656f84a4280c72dac1c1f8bb4ca1d7f8c9e3de3 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:18:58 -0400 Subject: [PATCH 17/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 4fc547668..fbe60b8d0 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,10 +723,6 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: TimeGenerated - operator: Include - values: - - "*" - name: HostPool operator: Include values: From 34b0b6f588d526b622abad225b1b177a1b49e1d9 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:20:45 -0400 Subject: [PATCH 18/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index fbe60b8d0..aa2bebb8c 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,10 +723,6 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: HostPool - operator: Include - values: - - "*" - name: ResourceGroup operator: Include values: From 320cfded309682d3e6ee15d55fd01a75d763dbd6 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:23:58 -0400 Subject: [PATCH 19/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index aa2bebb8c..59ba16ae0 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,6 +723,10 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: + - name: HostPool + operator: Include + values: + - "*" - name: ResourceGroup operator: Include values: @@ -783,7 +787,7 @@ | extend HostPool=tostring(split(_ResourceId, '/')[8]) | extend ErrorShort=tostring(Errors[0].CodeSymbolic) | extend ErrorMessage=tostring(Errors[0].Message) - | project TimeGenerated, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, _ResourceId + | project _ResourceId, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From af8b5d526cb407f3f8c420546647d28d9a29e8a9 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:25:41 -0400 Subject: [PATCH 20/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 59ba16ae0..a44b9c4fc 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -787,7 +787,7 @@ | extend HostPool=tostring(split(_ResourceId, '/')[8]) | extend ErrorShort=tostring(Errors[0].CodeSymbolic) | extend ErrorMessage=tostring(Errors[0].Message) - | project _ResourceId, HostPool, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage + | project _ResourceId, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, HostPool autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From a0eb8bdf0b0af15de6d51573d462128248c3eaab Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:27:18 -0400 Subject: [PATCH 21/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index a44b9c4fc..338b69457 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,10 +723,6 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: HostPool - operator: Include - values: - - "*" - name: ResourceGroup operator: Include values: @@ -763,6 +759,10 @@ operator: Include values: - "*" + - name: HostPool + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From 430b6951fcbef0a45081bce5ada110923d0ca68e Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:29:48 -0400 Subject: [PATCH 22/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 338b69457..99b798c50 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -759,10 +759,6 @@ operator: Include values: - "*" - - name: HostPool - operator: Include - values: - - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -787,7 +783,7 @@ | extend HostPool=tostring(split(_ResourceId, '/')[8]) | extend ErrorShort=tostring(Errors[0].CodeSymbolic) | extend ErrorMessage=tostring(Errors[0].Message) - | project _ResourceId, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, HostPool + | project _ResourceId, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From 093162c4a8a152a26fcd5d96791e4782684f5e40 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:33:34 -0400 Subject: [PATCH 23/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 99b798c50..8ab9648ea 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,10 +723,6 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: ResourceGroup - operator: Include - values: - - "*" - name: UserName operator: Include values: @@ -747,15 +743,11 @@ operator: Include values: - "*" - - name: ErrorShort - operator: Include - values: - - "*" - name: ErrorMessage operator: Include values: - "*" - - name: _ResourceId + - name: HostPool operator: Include values: - "*" From 4a829e5f89b6c77f1aba9c180ba30520ef77dc78 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:44:13 -0400 Subject: [PATCH 24/34] Update alerts.yaml --- .../hostPools/alerts.yaml | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 8ab9648ea..6160c0c64 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -760,22 +760,36 @@ | project-away TenantId,SourceSystem | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State=='Started', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State=='Connected', TimeGenerated , datetime(null) )) by CorrelationId | join kind=leftouter (WVDErrors - |summarize Errors=make_list(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message ,'ServiceError', ServiceError, 'Source', Source)) by CorrelationId - ) on CorrelationId - | join kind=leftouter (WVDCheckpoints + // Connection Errors + // List connection checkpoints and errors for each connection attempt, along with detailed information across all users. + //You can also uncomment the where clause to filter to a specific user if you are troubleshooting an issue. + WVDConnections + //| where UserName == "upn.here@contoso.com" + | project-away TenantId, SourceSystem + | summarize + arg_max(TimeGenerated, *), + StartTime = min(iff(State == 'Started', TimeGenerated, datetime(null))), + ConnectTime = min(iff(State == 'Connected', TimeGenerated, datetime(null))) + by CorrelationId + | join kind=leftouter + ( + WVDErrors + | summarize Errors=make_list(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message, 'ServiceError', ServiceError, 'Source', Source)) by CorrelationId + ) + on CorrelationId + | join kind=leftouter + ( + WVDCheckpoints | summarize Checkpoints=make_list(pack('Time', TimeGenerated, 'Name', Name, 'Parameters', Parameters, 'Source', Source)) by CorrelationId - | mv-apply Checkpoints on ( - order by todatetime(Checkpoints['Time']) asc - | summarize Checkpoints=make_list(Checkpoints)) - ) on CorrelationId + | mv-apply Checkpoints on + ( + order by todatetime(Checkpoints['Time']) asc + | summarize Checkpoints=make_list(Checkpoints) + ) + ) + on CorrelationId | project-away CorrelationId1, CorrelationId2 | order by TimeGenerated desc - | where TimeGenerated > ago(15m) - | extend ResourceGroup=tostring(split(_ResourceId, '/')[4]) - | extend HostPool=tostring(split(_ResourceId, '/')[8]) - | extend ErrorShort=tostring(Errors[0].CodeSymbolic) - | extend ErrorMessage=tostring(Errors[0].Message) - | project _ResourceId, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From 230a72eb2088c6a96ad8f9940d95c0835cce28e3 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:48:57 -0400 Subject: [PATCH 25/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 6160c0c64..59b20beda 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -755,11 +755,6 @@ numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 query: >- - WVDConnections - // | where UserName == "upn.here@contoso.com" - | project-away TenantId,SourceSystem - | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State=='Started', TimeGenerated , datetime(null) )), ConnectTime = min(iff(State=='Connected', TimeGenerated , datetime(null) )) by CorrelationId - | join kind=leftouter (WVDErrors // Connection Errors // List connection checkpoints and errors for each connection attempt, along with detailed information across all users. //You can also uncomment the where clause to filter to a specific user if you are troubleshooting an issue. From 01c88b28a245387331593db5c940aa4645321b77 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 11:00:09 -0400 Subject: [PATCH 26/34] Update alerts.yaml --- .../hostPools/alerts.yaml | 127 +++++++++--------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 59b20beda..1ca5a08ca 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -207,11 +207,11 @@ | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated | join kind = leftouter (WVDAgentHealthStatus - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool, _ResourceId + | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool + | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName + | extend ComputerName=tolower(ComputerName) + | summarize arg_max(TimeGenerated,*) by ComputerName + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -274,11 +274,11 @@ | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated | join kind = leftouter (WVDAgentHealthStatus - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool, _ResourceId + | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool + | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName + | extend ComputerName=tolower(ComputerName) + | summarize arg_max(TimeGenerated,*) by ComputerName + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -341,11 +341,11 @@ | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated | join kind = leftouter (WVDAgentHealthStatus - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool, _ResourceId + | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool + | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName + | extend ComputerName=tolower(ComputerName) + | summarize arg_max(TimeGenerated,*) by ComputerName + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -409,11 +409,11 @@ | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated | join kind = leftouter (WVDAgentHealthStatus - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool, _ResourceId + | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool + | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName + | extend ComputerName=tolower(ComputerName) + | summarize arg_max(TimeGenerated,*) by ComputerName + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -477,11 +477,11 @@ | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated | join kind = leftouter (WVDAgentHealthStatus - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool, _ResourceId + | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool + | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName + | extend ComputerName=tolower(ComputerName) + | summarize arg_max(TimeGenerated,*) by ComputerName + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -545,11 +545,11 @@ | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated | join kind = leftouter (WVDAgentHealthStatus - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool, _ResourceId + | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool + | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName + | extend ComputerName=tolower(ComputerName) + | summarize arg_max(TimeGenerated,*) by ComputerName + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -613,11 +613,11 @@ | project ComputerName, RenderedDescription, subscription, ResourceGroup, TimeGenerated | join kind = leftouter (WVDAgentHealthStatus - | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool - | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName - | extend ComputerName=tolower(ComputerName) - | summarize arg_max(TimeGenerated,*) by ComputerName - | project VMresourceGroup, ComputerName, HostPool, _ResourceId + | parse _ResourceId with "/subscriptions/" subscriptionAgentHealth "/resourcegroups/" ResourceGroupAgentHealth "/providers/microsoft.desktopvirtualization/hostpools/" HostPool + | parse SessionHostResourceId with "/subscriptions/" VMsubscription "/resourceGroups/" VMresourceGroup "/providers/Microsoft.Compute/virtualMachines/" ComputerName + | extend ComputerName=tolower(ComputerName) + | summarize arg_max(TimeGenerated,*) by ComputerName + | project VMresourceGroup, ComputerName, HostPool, _ResourceId ) on ComputerName autoMitigate: true autoResolve: true @@ -723,6 +723,14 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: + - name: HostPool + operator: Include + values: + - "*" + - name: ResourceGroup + operator: Include + values: + - "*" - name: UserName operator: Include values: @@ -743,11 +751,15 @@ operator: Include values: - "*" + - name: ErrorShort + operator: Include + values: + - "*" - name: ErrorMessage operator: Include values: - "*" - - name: HostPool + - name: _ResourceId operator: Include values: - "*" @@ -755,36 +767,27 @@ numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 query: >- - // Connection Errors - // List connection checkpoints and errors for each connection attempt, along with detailed information across all users. - //You can also uncomment the where clause to filter to a specific user if you are troubleshooting an issue. WVDConnections - //| where UserName == "upn.here@contoso.com" - | project-away TenantId, SourceSystem - | summarize - arg_max(TimeGenerated, *), - StartTime = min(iff(State == 'Started', TimeGenerated, datetime(null))), - ConnectTime = min(iff(State == 'Connected', TimeGenerated, datetime(null))) - by CorrelationId - | join kind=leftouter - ( - WVDErrors - | summarize Errors=make_list(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message, 'ServiceError', ServiceError, 'Source', Source)) by CorrelationId - ) - on CorrelationId - | join kind=leftouter - ( - WVDCheckpoints - | summarize Checkpoints=make_list(pack('Time', TimeGenerated, 'Name', Name, 'Parameters', Parameters, 'Source', Source)) by CorrelationId - | mv-apply Checkpoints on - ( + // | where UserName == "upn.here@contoso.com" + | project-away TenantId,SourceSystem + | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State=='Started', TimeGenerated , datetime(null))), ConnectTime = min(iff(State=='Connected', TimeGenerated , datetime(null))) by CorrelationId + | join kind=leftouter (WVDErrors + |summarize Errors=make_list(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message ,'ServiceError', ServiceError, 'Source', Source)) by CorrelationId + ) on CorrelationId + | join kind=leftouter (WVDCheckpoints + | summarize Checkpoints=make_list(pack('Time', TimeGenerated, 'Name', Name, 'Parameters', Parameters, 'Source', Source)) by CorrelationId + | mv-apply Checkpoints on ( order by todatetime(Checkpoints['Time']) asc - | summarize Checkpoints=make_list(Checkpoints) - ) - ) - on CorrelationId + | summarize Checkpoints=make_list(Checkpoints)) + ) on CorrelationId | project-away CorrelationId1, CorrelationId2 | order by TimeGenerated desc + | where TimeGenerated > ago(15m) + | extend ResourceGroup=tostring(split(_ResourceId, '/')[4]) + | extend HostPool=tostring(split(_ResourceId, '/')[8]) + | extend ErrorShort=tostring(Errors[0].CodeSymbolic) + | extend ErrorMessage=tostring(Errors[0].Message) + | project _ResourceId, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, HostPool autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From 351062456190ec528dcd7136855d88a7fd962d67 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:36:38 -0400 Subject: [PATCH 27/34] Compute Alerts YAML - Add Disk to 3 alerts Heartbeat Network Read (bytes/sec) Network Write (bytes/sec) --- services/Compute/virtualMachines/alerts.yaml | 87 +++++++++++++------- 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/services/Compute/virtualMachines/alerts.yaml b/services/Compute/virtualMachines/alerts.yaml index 8b883a85a..8e8e72eae 100644 --- a/services/Compute/virtualMachines/alerts.yaml +++ b/services/Compute/virtualMachines/alerts.yaml @@ -30,6 +30,7 @@ properties: scope: Resource multiResource: false + guid: b0bd7c37-eb24-47c2-a032-f925594152ed - name: Data Disk Read Latency (ms) description: Log Alert for Virtual Machine Data Disk Read Latency (ms) type: Log @@ -88,6 +89,7 @@ properties: scope: Subscription multiResource: false + guid: abddd643-e7c7-411c-b13a-64fdbb406cc8 - name: Data Disk Free Space Percentage description: Log Alert for Virtual Machine Data Disk Free Space Percentage type: Log @@ -146,6 +148,7 @@ properties: scope: Subscription multiResource: false + guid: 5dbf3a3d-7f08-4deb-a153-129c3485da84 - name: Data Disk Write Latency (ms) description: Log Alert for Virtual Machine Data Disk Write Latency (ms) type: Log @@ -204,6 +207,7 @@ properties: scope: Subscription multiResource: false + guid: 5c51e187-5b5d-428d-98d8-fb9ab4fea646 - name: Heartbeat description: Log Alert for Virtual Machine Heartbeat type: Log @@ -230,16 +234,11 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'Heartbeat - + query: >- + Heartbeat | summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId - | extend Duration = datetime_diff(''minute'',now(),TimeGenerated) - - | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), - _ResourceId - - ' + | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId, Disk autoMitigate: true autoResolve: true autoResolveTime: 0:10:00 @@ -264,6 +263,7 @@ properties: scope: Resource multiResource: false + guid: b3a5f580-77a6-4161-9a1e-7370ce783e67 - name: Network Read (bytes/sec) description: Log Alert for Virtual Machine Network Read (bytes/sec) type: Log @@ -293,18 +293,12 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'InsightsMetrics - + query: >- + InsightsMetrics | where Origin == "vm.azm.ms" - | where Namespace == "Network" and Name == "ReadBytesPerSecond" - | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) - - | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, - _ResourceId, NetworkInterface - - ' + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface, Disk autoMitigate: true autoResolve: true autoResolveTime: 0:10:00 @@ -320,6 +314,7 @@ properties: scope: Subscription multiResource: false + guid: b565e73f-71c8-4bb3-a792-903b67775497 - name: Network Write (bytes/sec) description: Log Alert for Virtual Machine Network Write (bytes/sec) type: Log @@ -349,18 +344,12 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'InsightsMetrics - + query: >- + InsightsMetrics | where Origin == "vm.azm.ms" - | where Namespace == "Network" and Name == "WriteBytesPerSecond" - | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) - - | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, - _ResourceId, NetworkInterface - - ' + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface, Disk autoMitigate: true autoResolve: true autoResolveTime: 0:10:00 @@ -376,6 +365,7 @@ properties: scope: Subscription multiResource: false + guid: 552ca1f1-d69d-4bc2-b044-19f81b225fd4 - name: OS Disk Read Latency (ms) description: Log Alert for Virtual Machine Data OS Read Latency (ms) type: Log @@ -432,6 +422,7 @@ properties: scope: Subscription multiResource: false + guid: 299ce963-728a-48c8-8b7d-e05b5f8a8f38 - name: OS Disk Free Space Percentage description: Log Alert for Virtual Machine OS Disk Free Space Percentage type: Log @@ -488,6 +479,7 @@ properties: scope: Subscription multiResource: false + guid: dca7af9f-cad2-4751-a61f-77d92c2ce523 - name: OS Disk Write Latency (ms) description: Log Alert for Virtual Machine OS Disk Write Latency (ms) type: Log @@ -542,6 +534,7 @@ properties: scope: Subscription multiResource: false + guid: 37d9da12-88e6-4c01-8772-384920d34458 - name: Processor Utilization Percentage description: Log Alert for Virtual Machine Processor Utilization Percentage type: Log @@ -592,6 +585,7 @@ properties: scope: Subscription multiResource: false + guid: 56d57b79-b7c3-4a17-a96b-3fc7152d9c5c - name: Available Memory Percentage description: Log Alert for Virtual Machine Available Memory Percentage type: Log @@ -646,13 +640,14 @@ properties: scope: Subscription multiResource: false + guid: 69912da3-8d8d-4d57-884a-97c2bdd03bdd - name: Percentage CPU description: The percentage of allocated compute units that are currently in use by the Virtual Machine(s) type: Metric verified: true visible: true - tags: + tags: null properties: metricName: Percentage CPU metricNamespace: Microsoft.Compute/virtualMachines @@ -663,13 +658,14 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80 + guid: a9bac9fd-2382-4ce1-b68b-8898caf45038 - name: Data Disk IOPS Consumed Percentage description: Percentage of data disk I/Os consumed per minute. Only available on VM series that support premium storage. type: Metric verified: true visible: true - tags: + tags: null properties: metricName: Data Disk IOPS Consumed Percentage metricNamespace: Microsoft.Compute/virtualMachines @@ -680,13 +676,14 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 95.0 + guid: b0cbb9f8-cc67-4e7d-95a8-5a058c6de7e0 - name: OS Disk IOPS Consumed Percentage description: Percentage of operating system disk I/Os consumed per minute. Only available on VM series that support premium storage. type: Metric verified: true visible: true - tags: + tags: null properties: metricName: OS Disk IOPS Consumed Percentage metricNamespace: Microsoft.Compute/virtualMachines @@ -697,6 +694,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 95.0 + guid: a2bf3c43-d327-473b-9204-f77e2a0fe398 - name: Available Memory Bytes description: Amount of physical memory, in bytes, immediately available for allocation to a process or for system use in the Virtual Machine @@ -717,13 +715,14 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1000000000 + guid: bf1e0006-6089-4f92-a115-fc83aa0fbdd5 - name: Network In Total description: The number of bytes received on all network interfaces by the Virtual Machine(s) (Incoming Traffic) type: Metric verified: true visible: true - tags: + tags: null properties: metricName: Network In Total metricNamespace: Microsoft.Compute/virtualMachines @@ -734,13 +733,14 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 500000000000 + guid: ea4501a3-1e77-4df1-ab61-3ee28ae529eb - name: Network Out Total description: The number of bytes out on all network interfaces by the Virtual Machine(s) (Outgoing Traffic) type: Metric verified: true visible: true - tags: + tags: null properties: metricName: Network Out Total metricNamespace: Microsoft.Compute/virtualMachines @@ -751,6 +751,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 200000000000 + guid: e2028144-b142-445c-b544-3bb438537c8f - name: VmAvailabilityMetric description: Measure of Availability of Virtual machines over time. type: Metric @@ -770,6 +771,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1 + guid: 3fa5376a-705a-48e4-b314-6282a74c9f7c - name: OS Disk Bandwidth Consumed Percentage description: Percentage of operating system disk bandwidth consumed per minute. Only available on VM series that support premium storage. @@ -789,6 +791,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + guid: f7e19635-0118-4040-83d5-2f4c2150aef1 - name: Inbound Flows description: Inbound Flows are number of current flows in the inbound direction (traffic going into the VM) @@ -808,6 +811,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100000.0 + guid: 6de4e570-4270-4e9b-949e-5680b061e7fd - name: Data Disk Bandwidth Consumed Percentage description: Percentage of data disk bandwidth consumed per minute. Only available on VM series that support premium storage. @@ -827,6 +831,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + guid: b57b0749-f851-4c3d-b7fb-7cf26f3bf16a - name: Outbound Flows description: Outbound Flows are number of current flows in the outbound direction (traffic going out of the VM) @@ -846,6 +851,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100000.0 + guid: a477bf96-4b0f-471c-b5d3-0acdb59612e6 - name: Data Disk Queue Depth description: Data Disk Queue Depth(or Queue Length) type: Metric @@ -866,6 +872,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + guid: 040bc9c3-da5c-4fd0-b160-979ce89364ae - name: CPU Credits Remaining description: Total number of credits available to burst. Only available on B-series burstable VMs @@ -885,6 +892,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 5.0 + guid: 821888a1-6490-4a2d-8850-bdc45057a853 - name: OS Disk Queue Depth description: OS Disk Queue Depth(or Queue Length) type: Metric @@ -903,6 +911,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10.0 + guid: 36262245-4c8a-4143-9ab4-68e9c23ae19a - name: VM Cached IOPS Consumed Percentage description: Percentage of cached disk IOPS consumed by the VM. Only available on VM series that support premium storage. @@ -922,6 +931,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + guid: c878115f-2a89-446e-8980-ef4f152120c5 - name: VM Cached Bandwidth Consumed Percentage description: Percentage of cached disk bandwidth consumed by the VM. Only available on VM series that support premium storage. @@ -941,6 +951,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + guid: eb85f15b-b705-418f-b8ed-93d8cc7a9a6c - name: VM Uncached IOPS Consumed Percentage description: Percentage of uncached disk IOPS consumed by the VM. Only available on VM series that support premium storage. @@ -960,6 +971,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + guid: 765ceb5b-ec1d-44d7-b94d-bb139030df81 - name: VM Uncached Bandwidth Consumed Percentage description: Percentage of uncached disk bandwidth consumed by the VM. Only available on VM series that support premium storage. @@ -979,6 +991,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + guid: e3981e7b-fd8b-4e07-bb68-d598c020c12d - name: Disk Write Operations/Sec description: Disk Write IOPS type: Metric @@ -997,6 +1010,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10.0 + guid: edff41cb-d9b8-46ba-ba39-42747c1a4c4b - name: OS Disk Write Bytes/sec description: Bytes/Sec written to a single disk during monitoring period for OS disk @@ -1016,6 +1030,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 48000000.0 + guid: 929b095e-5ea1-48b4-bf4e-bfc1a941a908 - name: CPU Credits Consumed description: Total number of credits consumed by the Virtual Machine. Only available on B-series burstable VMs @@ -1035,6 +1050,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + guid: a407ae10-6263-449c-81b9-172760d6dc6d - name: Data Disk Write Bytes/sec description: Bytes/Sec written to a single disk during monitoring period type: Metric @@ -1053,6 +1069,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10000000.0 + guid: e45d685d-14c4-4422-8096-6f11d628fb20 - name: Disk Read Operations/Sec description: Disk Read IOPS type: Metric @@ -1071,6 +1088,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 400.0 + guid: 8bc489c0-d2f7-43c1-9bb7-478c9503fb2e - name: Data Disk Max Burst IOPS description: Maximum IOPS Data Disk can achieve with bursting type: Metric @@ -1089,6 +1107,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 3750.0 + guid: 9b0f41af-1c52-4890-a23a-3bfebfee1154 - name: Data Disk Read Operations/Sec description: Read IOPS from a single disk during monitoring period type: Metric @@ -1107,6 +1126,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + guid: 3c5518ea-9a0f-44ff-b197-47b3d6db060b - name: Disk Write Bytes description: Bytes written to disk during monitoring period type: Metric @@ -1125,6 +1145,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 3500000000.0 + guid: b1cc650c-24fe-4f9e-a2ec-5757816526c0 - name: Data Disk Write Operations/Sec description: Write IOPS from a single disk during monitoring period type: Metric @@ -1143,6 +1164,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 50.0 + guid: 326b359d-e0a2-4055-8e1f-f9c5f9df5599 - name: OS Disk Max Burst IOPS description: Maximum IOPS OS Disk can achieve with bursting type: Metric @@ -1161,6 +1183,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 3500.0 + guid: 916dc60b-b2d2-4708-9fa4-6a36b244f499 - name: Disk Read Bytes description: Bytes read from disk during monitoring period type: Metric @@ -1179,6 +1202,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 4000000000.0 + guid: b9221998-f2bb-4ae8-b2c8-f9c4750e06f7 - name: OS Disk Write Operations/Sec description: Write IOPS from a single disk during monitoring period for OS disk type: Metric @@ -1197,3 +1221,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 200.0 + guid: 3be4037a-c692-402d-843d-b3fe43053edf From 8b43bbc43938173a0b5c631580d2d5ab43168abd Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:47:21 -0400 Subject: [PATCH 28/34] Update alerts.yaml --- services/Compute/virtualMachines/alerts.yaml | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/services/Compute/virtualMachines/alerts.yaml b/services/Compute/virtualMachines/alerts.yaml index 8e8e72eae..382796693 100644 --- a/services/Compute/virtualMachines/alerts.yaml +++ b/services/Compute/virtualMachines/alerts.yaml @@ -227,17 +227,13 @@ operator: Include values: - '*' - - name: Disk - operator: Include - values: - - '*' failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 query: >- Heartbeat | summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId - | extend Duration = datetime_diff(''minute'',now(),TimeGenerated) + | extend Duration = datetime_diff('minute',now(),TimeGenerated) | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId, Disk autoMitigate: true autoResolve: true @@ -286,10 +282,6 @@ operator: Include values: - '*' - - name: Disk - operator: Include - values: - - '*' failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -298,7 +290,7 @@ | where Origin == "vm.azm.ms" | where Namespace == "Network" and Name == "ReadBytesPerSecond" | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) - | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface, Disk + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface autoMitigate: true autoResolve: true autoResolveTime: 0:10:00 @@ -337,10 +329,6 @@ operator: Include values: - '*' - - name: Disk - operator: Include - values: - - '*' failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -349,7 +337,7 @@ | where Origin == "vm.azm.ms" | where Namespace == "Network" and Name == "WriteBytesPerSecond" | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) - | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface, Disk + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface autoMitigate: true autoResolve: true autoResolveTime: 0:10:00 From aa9598a6eb8b874a77cb7598891fcc3665211bc9 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:49:48 -0400 Subject: [PATCH 29/34] Update alerts.yaml --- services/Compute/virtualMachines/alerts.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/Compute/virtualMachines/alerts.yaml b/services/Compute/virtualMachines/alerts.yaml index 382796693..d0e0d9463 100644 --- a/services/Compute/virtualMachines/alerts.yaml +++ b/services/Compute/virtualMachines/alerts.yaml @@ -234,7 +234,7 @@ Heartbeat | summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId | extend Duration = datetime_diff('minute',now(),TimeGenerated) - | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId, Disk + | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId autoMitigate: true autoResolve: true autoResolveTime: 0:10:00 From 8f17e83f7d4522e2e43fbd742294bde89ef64404 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:00:16 -0400 Subject: [PATCH 30/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 1ca5a08ca..6ba31915a 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -759,10 +759,6 @@ operator: Include values: - "*" - - name: _ResourceId - operator: Include - values: - - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 @@ -787,7 +783,7 @@ | extend HostPool=tostring(split(_ResourceId, '/')[8]) | extend ErrorShort=tostring(Errors[0].CodeSymbolic) | extend ErrorMessage=tostring(Errors[0].Message) - | project _ResourceId, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, HostPool + | project ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, HostPool autoMitigate: true autoResolve: true autoResolveTime: "0:30:00" From 776fe4a78fa8c4ad6119e6ff345aec4405c0dab0 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 22:27:08 -0400 Subject: [PATCH 31/34] Update alerts.yaml --- .../hostPools/alerts.yaml | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 6ba31915a..b49e6611d 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -723,42 +723,42 @@ resourceIdColumn: _ResourceId threshold: 1 dimensions: - - name: HostPool - operator: Include - values: - - "*" - name: ResourceGroup operator: Include values: - - "*" + - "*" - name: UserName operator: Include values: - - "*" + - "*" - name: ClientOS operator: Include values: - - "*" + - "*" - name: ClientVersion operator: Include values: - - "*" + - "*" - name: ClientSideIPAddress operator: Include values: - - "*" + - "*" - name: ConnectionType operator: Include values: - - "*" + - "*" - name: ErrorShort operator: Include values: - - "*" + - "*" - name: ErrorMessage operator: Include values: - - "*" + - "*" + - name: HostPool + operator: Include + values: + - "*" failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 From a935f5b09b680ecf5f2a01c668dbde4cc97b161e Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 22:36:24 -0400 Subject: [PATCH 32/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index b49e6611d..fa48cb581 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -718,8 +718,8 @@ severity: 3 operator: GreaterThanOrEqual timeAggregation: Count - windowSize: PT5M - evaluationFrequency: PT5M + windowSize: PT15M + evaluationFrequency: PT15M resourceIdColumn: _ResourceId threshold: 1 dimensions: @@ -743,14 +743,6 @@ operator: Include values: - "*" - - name: ConnectionType - operator: Include - values: - - "*" - - name: ErrorShort - operator: Include - values: - - "*" - name: ErrorMessage operator: Include values: From c37e52a5eb22d3eb110de0cbcda64b6df6fe0ac3 Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Tue, 11 Jun 2024 22:47:11 -0400 Subject: [PATCH 33/34] Update alerts.yaml --- services/DesktopVirtualization/hostPools/alerts.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index fa48cb581..1fadfb268 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -756,7 +756,6 @@ minFailingPeriodsToAlert: 1 query: >- WVDConnections - // | where UserName == "upn.here@contoso.com" | project-away TenantId,SourceSystem | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State=='Started', TimeGenerated , datetime(null))), ConnectTime = min(iff(State=='Connected', TimeGenerated , datetime(null))) by CorrelationId | join kind=leftouter (WVDErrors From 135fe347dd2d4a293fcf9b8a6fda134675807dff Mon Sep 17 00:00:00 2001 From: Jonathan Core <56272039+JCoreMS@users.noreply.github.com> Date: Thu, 13 Jun 2024 12:12:27 -0400 Subject: [PATCH 34/34] Update alerts.yaml --- .../hostPools/alerts.yaml | 57 +++++++++++-------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/services/DesktopVirtualization/hostPools/alerts.yaml b/services/DesktopVirtualization/hostPools/alerts.yaml index 1fadfb268..2c6a296e4 100644 --- a/services/DesktopVirtualization/hostPools/alerts.yaml +++ b/services/DesktopVirtualization/hostPools/alerts.yaml @@ -716,10 +716,11 @@ - avd properties: severity: 3 + enabled: true + evaluationFrequency: PT5M operator: GreaterThanOrEqual timeAggregation: Count - windowSize: PT15M - evaluationFrequency: PT15M + windowSize: PT5M resourceIdColumn: _ResourceId threshold: 1 dimensions: @@ -743,6 +744,14 @@ operator: Include values: - "*" + - name: ConnectionType + operator: Include + values: + - "*" + - name: ErrorShort + operator: Include + values: + - "*" - name: ErrorMessage operator: Include values: @@ -754,28 +763,28 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: >- - WVDConnections - | project-away TenantId,SourceSystem - | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State=='Started', TimeGenerated , datetime(null))), ConnectTime = min(iff(State=='Connected', TimeGenerated , datetime(null))) by CorrelationId - | join kind=leftouter (WVDErrors - |summarize Errors=make_list(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message ,'ServiceError', ServiceError, 'Source', Source)) by CorrelationId - ) on CorrelationId - | join kind=leftouter (WVDCheckpoints - | summarize Checkpoints=make_list(pack('Time', TimeGenerated, 'Name', Name, 'Parameters', Parameters, 'Source', Source)) by CorrelationId - | mv-apply Checkpoints on ( - order by todatetime(Checkpoints['Time']) asc - | summarize Checkpoints=make_list(Checkpoints)) - ) on CorrelationId - | project-away CorrelationId1, CorrelationId2 - | order by TimeGenerated desc - | where TimeGenerated > ago(15m) - | extend ResourceGroup=tostring(split(_ResourceId, '/')[4]) - | extend HostPool=tostring(split(_ResourceId, '/')[8]) - | extend ErrorShort=tostring(Errors[0].CodeSymbolic) - | extend ErrorMessage=tostring(Errors[0].Message) - | project ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, HostPool - autoMitigate: true + query: |2 + WVDConnections + // | where UserName == "upn.here@contoso.com" + | project-away TenantId,SourceSystem + | summarize arg_max(TimeGenerated, *), StartTime = min(iff(State=='Started', TimeGenerated , datetime(null))), ConnectTime = min(iff(State=='Connected', TimeGenerated , datetime(null))) by CorrelationId + | join kind=leftouter (WVDErrors + |summarize Errors=make_list(pack('Code', Code, 'CodeSymbolic', CodeSymbolic, 'Time', TimeGenerated, 'Message', Message ,'ServiceError', ServiceError, 'Source', Source)) by CorrelationId + ) on CorrelationId + | join kind=leftouter (WVDCheckpoints + | summarize Checkpoints=make_list(pack('Time', TimeGenerated, 'Name', Name, 'Parameters', Parameters, 'Source', Source)) by CorrelationId + | mv-apply Checkpoints on ( + order by todatetime(Checkpoints['Time']) asc + | summarize Checkpoints=make_list(Checkpoints)) + ) on CorrelationId + | project-away CorrelationId1, CorrelationId2 + | order by TimeGenerated desc + | where TimeGenerated > ago(15m) + | extend ResourceGroup=tostring(split(_ResourceId, '/')[4]) + | extend HostPool=tostring(split(_ResourceId, '/')[8]) + | extend ErrorShort=tostring(Errors[0].CodeSymbolic) + | extend ErrorMessage=tostring(Errors[0].Message) + | project _ResourceId, ResourceGroup, UserName, ClientOS, ClientVersion, ClientSideIPAddress, ConnectionType, ErrorShort, ErrorMessage, HostPool autoResolve: true autoResolveTime: "0:30:00" references: