From 6153eb520058ba71b1db1f4c72e589a2ce83e4f1 Mon Sep 17 00:00:00 2001 From: Cassie Kays <86622587+cassiekays@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:37:47 -0700 Subject: [PATCH 1/3] changed two AOAI alerts to visible with the proper thresholds --- services/CognitiveServices/accounts/alerts.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/services/CognitiveServices/accounts/alerts.yaml b/services/CognitiveServices/accounts/alerts.yaml index 9d8a784db..9a28dcb26 100644 --- a/services/CognitiveServices/accounts/alerts.yaml +++ b/services/CognitiveServices/accounts/alerts.yaml @@ -343,18 +343,18 @@ description: Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100. type: Metric verified: false - visible: false + visible: true tags: manual-ck properties: metricName: AzureOpenAIProvisionedManagedUtilizationV2 metricNamespace: Microsoft.CognitiveServices/accounts - severity: 0 + severity: 2 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Total operator: GreaterThan criterionType: StaticThresholdCriterion - threshold: 0.0 + threshold: 80 guid: 693a3b37-1e2a-42d1-aaed-b1f374276d1c - name: AzureOpenAIRequests description: Number of calls made to the Azure OpenAI API over a period of time. @@ -377,18 +377,18 @@ description: Recommended latency (responsiveness) measure for streaming requests. type: Metric verified: false - visible: false + visible: true tags: manual-ck properties: metricName: AzureOpenAITimeToResponse metricNamespace: Microsoft.CognitiveServices/accounts - severity: 0 + severity: 2 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Total operator: GreaterThan criterionType: StaticThresholdCriterion - threshold: 0.0 + threshold: 200ms guid: 995cc12a-1887-4669-92c5-70a6ca8bfe70 - name: BaselineEstimatorOverallReward description: Baseline Estimator Overall Reward. From befb971dfe3f4830a2c1c9f3d64bf1ddda9cd370 Mon Sep 17 00:00:00 2001 From: Cassie Kays <86622587+cassiekays@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:33:19 -0700 Subject: [PATCH 2/3] changed PG verified to true; added token cache threshold : visible and verified --- services/CognitiveServices/accounts/alerts.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/services/CognitiveServices/accounts/alerts.yaml b/services/CognitiveServices/accounts/alerts.yaml index 9a28dcb26..da9fa66c6 100644 --- a/services/CognitiveServices/accounts/alerts.yaml +++ b/services/CognitiveServices/accounts/alerts.yaml @@ -308,19 +308,19 @@ - name: AzureOpenAIContextTokensCacheMatchRate description: Percentage of the prompt tokens hit the cache, avaiable for PTU-managed. type: Metric - verified: false - visible: false + verified: true + visible: true tags: manual-ck properties: metricName: AzureOpenAIContextTokensCacheMatchRate metricNamespace: Microsoft.CognitiveServices/accounts - severity: 0 + severity: 2 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Total operator: GreaterThan criterionType: StaticThresholdCriterion - threshold: 0.0 + threshold: 75 guid: 81f8369c-65bf-4194-bfd2-ffdfa2470577 - name: AzureOpenAIProvisionedManagedUtilization description: Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100. @@ -342,7 +342,7 @@ - name: AzureOpenAIProvisionedManagedUtilizationV2 description: Utilization % for a provisoned-managed deployment, calculated as (PTUs consumed / PTUs deployed) x 100. type: Metric - verified: false + verified: true visible: true tags: manual-ck properties: @@ -376,7 +376,7 @@ - name: AzureOpenAITimeToResponse description: Recommended latency (responsiveness) measure for streaming requests. type: Metric - verified: false + verified: true visible: true tags: manual-ck properties: From c77f3422ca0f9eb1e000ba0bd0ab6d4a90b5ea22 Mon Sep 17 00:00:00 2001 From: Cassie Kays <86622587+cassiekays@users.noreply.github.com> Date: Thu, 31 Oct 2024 12:04:17 -0700 Subject: [PATCH 3/3] fixed text in threshold for time to respond metric --- services/CognitiveServices/accounts/alerts.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/CognitiveServices/accounts/alerts.yaml b/services/CognitiveServices/accounts/alerts.yaml index da9fa66c6..2105993ce 100644 --- a/services/CognitiveServices/accounts/alerts.yaml +++ b/services/CognitiveServices/accounts/alerts.yaml @@ -374,7 +374,7 @@ threshold: 0.0 guid: a1528d17-f288-46b1-b084-8b8fe3af90fa - name: AzureOpenAITimeToResponse - description: Recommended latency (responsiveness) measure for streaming requests. + description: Recommended latency (responsiveness) measure for streaming requests. Time in milliseconds. type: Metric verified: true visible: true @@ -388,7 +388,7 @@ timeAggregation: Total operator: GreaterThan criterionType: StaticThresholdCriterion - threshold: 200ms + threshold: 200 guid: 995cc12a-1887-4669-92c5-70a6ca8bfe70 - name: BaselineEstimatorOverallReward description: Baseline Estimator Overall Reward.