From d4db89e26aed573532d91590953d03799e5deaf1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 6 Dec 2024 16:22:05 +0000 Subject: [PATCH] [GitHub Action - Generate Templates] Generate templates for alerts --- ..._67300599-c61e-48d2-b47d-979b2254d494.json | 256 ++++++++++++++++++ ..._2030d931-6431-4134-9a03-106ebb83cb2d.json | 256 ++++++++++++++++++ ..._6aff7331-001f-4ee6-b5d2-1fc43b18b7de.json | 256 ++++++++++++++++++ ..._c4226730-ae59-4607-bddc-03b91dad1c4b.json | 256 ++++++++++++++++++ ..._7ade7362-3df1-4ad2-a000-78fa7b3d9b27.json | 256 ++++++++++++++++++ ..._5e223d13-112e-4f84-82ba-e03a76f6350f.json | 256 ++++++++++++++++++ ..._bb1969d8-eaa2-45b6-bd9f-09348b1ee346.json | 256 ++++++++++++++++++ ..._6a96dc94-674f-4a9c-830e-a0a4f7383646.json | 256 ++++++++++++++++++ ..._4b62cf77-069b-42a3-9608-8cd585a640f1.json | 256 ++++++++++++++++++ ..._50ee79d0-a321-4217-8f5b-b096654ad1ce.json | 256 ++++++++++++++++++ ..._639014b8-0ac6-4ae1-a887-bb3979669287.json | 256 ++++++++++++++++++ ...67300599-c61e-48d2-b47d-979b2254d494.bicep | 173 ++++++++++++ ...2030d931-6431-4134-9a03-106ebb83cb2d.bicep | 178 ++++++++++++ ...6aff7331-001f-4ee6-b5d2-1fc43b18b7de.bicep | 178 ++++++++++++ ...c4226730-ae59-4607-bddc-03b91dad1c4b.bicep | 178 ++++++++++++ ...7ade7362-3df1-4ad2-a000-78fa7b3d9b27.bicep | 178 ++++++++++++ ...5e223d13-112e-4f84-82ba-e03a76f6350f.bicep | 178 ++++++++++++ ...bb1969d8-eaa2-45b6-bd9f-09348b1ee346.bicep | 178 ++++++++++++ ...6a96dc94-674f-4a9c-830e-a0a4f7383646.bicep | 178 ++++++++++++ ...4b62cf77-069b-42a3-9608-8cd585a640f1.bicep | 178 ++++++++++++ ...50ee79d0-a321-4217-8f5b-b096654ad1ce.bicep | 178 ++++++++++++ ...639014b8-0ac6-4ae1-a887-bb3979669287.bicep | 173 ++++++++++++ 22 files changed, 4764 insertions(+) create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.json create mode 100644 services/HybridCompute/machines/templates/arm/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.json create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.bicep create mode 100644 services/HybridCompute/machines/templates/bicep/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.bicep diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.json b/services/HybridCompute/machines/templates/arm/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.json new file mode 100644 index 000000000..10fa507cd --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Available Memory Percentage", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"Memory\" and Name == \"AvailableMB\" | extend TotalMemory = toreal(todynamic(Tags)[\"vm.azm.ms/memorySizeMB\"]) | extend AvailableMemoryPercentage = (toreal(Val) / TotalMemory) * 100.0 | summarize AggregatedValue = avg(AvailableMemoryPercentage) by bin(TimeGenerated,15m), Computer, _ResourceId", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "10", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.json b/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.json new file mode 100644 index 000000000..f810c3f6d --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Data Disk Free Space Percentage", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"LogicalDisk\"and Name == \"FreeSpacePercentage\" | extend Disk=tostring(todynamic(Tags)[\"vm.azm.ms/mountId\"]) | where Disk !in (\"C:\",\"/\") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated,15m), Computer,_ResourceId, Disk", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "10", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.json b/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.json new file mode 100644 index 000000000..b1aea5382 --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Data Disk Read Latency", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"LogicalDisk\" and Name == \"ReadLatencyMs\" | extend Disk=tostring(todynamic(Tags)[\"vm.azm.ms/mountId\"]) | where Disk !in (\"C:\", \"/\") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "25", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.json b/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.json new file mode 100644 index 000000000..f69c12074 --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Data Disk Write Latency (ms)", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"LogicalDisk\" and Name == \"WriteLatencyMs\" | extend Disk=tostring(todynamic(Tags)[\"vm.azm.ms/mountId\"]) | where Disk !in (\"C:\",\"/\") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated,15m), Computer, _ResourceId, Disk", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "25", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.json b/services/HybridCompute/machines/templates/arm/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.json new file mode 100644 index 000000000..832278425 --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Heartbeat", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 1, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "Heartbeat | where _ResourceId has \"Microsoft.HybridCompute/machines\" | summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId | extend Duration = datetime_diff(\"minute\",now(),TimeGenerated) | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "10", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.json b/services/HybridCompute/machines/templates/arm/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.json new file mode 100644 index 000000000..344333d3d --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Network Read (bytes/sec)", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"Network\" and Name == \"ReadBytesPerSecond\" | extend NetworkInterface=tostring(todynamic(Tags)[\"vm.azm.ms/networkDeviceId\"]) | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "10000000", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.json b/services/HybridCompute/machines/templates/arm/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.json new file mode 100644 index 000000000..740559fd9 --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Network Write (bytes/sec)", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"Network\" and Name == \"WriteBytesPerSecond\" | extend NetworkInterface=tostring(todynamic(Tags)[\"vm.azm.ms/networkDeviceId\"]) | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "10000000", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.json b/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.json new file mode 100644 index 000000000..aeecdc44d --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine OS Disk Free Space Percentage", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"LogicalDisk\" and Name == \"FreeSpacePercentage\" | extend Disk=tostring(todynamic(Tags)[\"vm.azm.ms/mountId\"]) | where Disk in (\"C:\",\"/\") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "10", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.json b/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.json new file mode 100644 index 000000000..707af5fa6 --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Data OS Read Latency (ms)", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"LogicalDisk\" and Name == \"ReadLatencyMs\" | extend Disk=tostring(todynamic(Tags)[\"vm.azm.ms/mountId\"]) | where Disk in (\"C:\",\"/\") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "25", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.json b/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.json new file mode 100644 index 000000000..44c07f6de --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine OS Disk Write Latency (ms)", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"LogicalDisk\" and Name == \"WriteLatencyMs\" | extend Disk=tostring(todynamic(Tags)[\"vm.azm.ms/mountId\"]) | where Disk in (\"C:\",\"/\") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "25", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}, {"name": "Disk", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/arm/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.json b/services/HybridCompute/machines/templates/arm/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.json new file mode 100644 index 000000000..61d79b3eb --- /dev/null +++ b/services/HybridCompute/machines/templates/arm/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log Alert for Hybrid Machine Processor Utilization Percentage", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": "InsightsMetrics | where _ResourceId has \"Microsoft.HybridCompute/machines\" | where Origin == \"vm.azm.ms\" | where Namespace == \"Processor\" and Name == \"UtilizationPercentage\" | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "_ResourceId", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "85", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT15M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "Computer", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2023-07-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.bicep new file mode 100644 index 000000000..f7cf7b7e9 --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineAvailableMemoryPercentage_67300599-c61e-48d2-b47d-979b2254d494.bicep @@ -0,0 +1,173 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Available Memory Percentage' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "Memory" and Name == "AvailableMB" | extend TotalMemory = toreal(todynamic(Tags)["vm.azm.ms/memorySizeMB"]) | extend AvailableMemoryPercentage = (toreal(Val) / TotalMemory) * 100.0 | summarize AggregatedValue = avg(AvailableMemoryPercentage) by bin(TimeGenerated,15m), Computer, _ResourceId' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 10 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.bicep new file mode 100644 index 000000000..754ff2f76 --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskFreeSpacePercentageAlert_2030d931-6431-4134-9a03-106ebb83cb2d.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Data Disk Free Space Percentage' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "LogicalDisk"and Name == "FreeSpacePercentage" | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) | where Disk !in ("C:","/") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated,15m), Computer,_ResourceId, Disk' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 10 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.bicep new file mode 100644 index 000000000..ffdcdc664 --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskReadLatencyAlert_6aff7331-001f-4ee6-b5d2-1fc43b18b7de.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Data Disk Read Latency' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "LogicalDisk" and Name == "ReadLatencyMs" | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) | where Disk !in ("C:", "/") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 25 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.bicep new file mode 100644 index 000000000..47ac9b91e --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineDataDiskWriteLatencyAlert_c4226730-ae59-4607-bddc-03b91dad1c4b.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Data Disk Write Latency (ms)' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "LogicalDisk" and Name == "WriteLatencyMs" | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) | where Disk !in ("C:","/") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated,15m), Computer, _ResourceId, Disk' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 25 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.bicep new file mode 100644 index 000000000..29e257ccb --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineHeartbeatAlert_7ade7362-3df1-4ad2-a000-78fa7b3d9b27.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Heartbeat' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 1 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'Heartbeat | where _ResourceId has "Microsoft.HybridCompute/machines" | summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId | extend Duration = datetime_diff("minute",now(),TimeGenerated) | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 10 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.bicep new file mode 100644 index 000000000..5bb4c98f5 --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineNetworkReadAlert_5e223d13-112e-4f84-82ba-e03a76f6350f.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Network Read (bytes/sec)' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "Network" and Name == "ReadBytesPerSecond" | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 10000000 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.bicep new file mode 100644 index 000000000..8e0d4f83a --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineNetworkWriteAlert_bb1969d8-eaa2-45b6-bd9f-09348b1ee346.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Network Write (bytes/sec)' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "Network" and Name == "WriteBytesPerSecond" | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 10000000 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.bicep new file mode 100644 index 000000000..348fb9ab6 --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskFreeSpacePercentage_6a96dc94-674f-4a9c-830e-a0a4f7383646.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine OS Disk Free Space Percentage' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "LogicalDisk" and Name == "FreeSpacePercentage" | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) | where Disk in ("C:","/") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 10 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.bicep new file mode 100644 index 000000000..310da0d92 --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskReadLatencyAlert_4b62cf77-069b-42a3-9608-8cd585a640f1.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Data OS Read Latency (ms)' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "LogicalDisk" and Name == "ReadLatencyMs" | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) | where Disk in ("C:","/") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 25 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.bicep new file mode 100644 index 000000000..98aa37c3f --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineOSDiskWriteLatencyms_50ee79d0-a321-4217-8f5b-b096654ad1ce.bicep @@ -0,0 +1,178 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine OS Disk Write Latency (ms)' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "LogicalDisk" and Name == "WriteLatencyMs" | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) | where Disk in ("C:","/") | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 25 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + } + { + name: 'Disk' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/HybridCompute/machines/templates/bicep/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.bicep b/services/HybridCompute/machines/templates/bicep/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.bicep new file mode 100644 index 000000000..9633a1595 --- /dev/null +++ b/services/HybridCompute/machines/templates/bicep/HybridMachineProcessorUtilizationPercentage_639014b8-0ac6-4ae1-a887-bb3979669287.bicep @@ -0,0 +1,173 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log Alert for Hybrid Machine Processor Utilization Percentage' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = 'InsightsMetrics | where _ResourceId has "Microsoft.HybridCompute/machines" | where Origin == "vm.azm.ms" | where Namespace == "Processor" and Name == "UtilizationPercentage" | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '_ResourceId' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 85 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT15M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'Computer' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2023-07-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +}