Skip to content

Commit

Permalink
add read latency sli for rds (#256)
Browse files Browse the repository at this point in the history
* feat: aws rds latency

* feat: aws rds latency - read

* feat: aws rds latency - read

* feat: aws rds latency - read and write

* feat: aws rds latency - read and write

* feat: aws rds latency - read and write

* feat: s latency - read and write

* feat: aws rds latency - read and write

* feat: aws rds latency - read and write added graph

* feat: aws rds latency -avg  added graph

* feat: aws rds latency -avg  added graph - tidyup

* feat: aws rds latency -avg  added graph - tidyup sum to count

* feat: aws rds latency -avg  added graph - tidyup sum to count

* feat: aws rds latency - fix to NaN avaerage when NaN is scraped into promethesus
  • Loading branch information
georgeowusuHO authored Sep 30, 2022
1 parent f92fb86 commit 7991743
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 4 deletions.
6 changes: 4 additions & 2 deletions monitoring-as-code/mixin-defs/testing-mixin.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,12 @@ local sliSpecList = {
},
},
SLI12: {
title: 'test',
title: 'test - aws_rds_read',
sliDescription: 'test',
period: '7d',
metricType: 'aws_rds_read',
evalInterval: '5m',
latencyTarget: 0.001,
selectors: {
product: 'test',
},
Expand All @@ -198,11 +199,12 @@ local sliSpecList = {
},
},
SLI13: {
title: 'test',
title: 'test - aws_rds_write',
sliDescription: 'test',
period: '7d',
metricType: 'aws_rds_write',
evalInterval: '5m',
latencyTarget: 0.001,
selectors: {
product: 'test',
},
Expand Down
4 changes: 2 additions & 2 deletions monitoring-as-code/src/metric-types.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@
},
sliTypesConfig: {
latency: {
library: (import 'sli-value-libraries/average-using-single-metric.libsonnet'),
library: (import 'sli-value-libraries/average-latency-using-seconds-target-metric.libsonnet'),
description: 'The average latency of %(sliDescription)s should be %(comparison)s %(metricTarget)0.1f',
targetMetrics: {
target: 'averageLatency',
Expand Down Expand Up @@ -437,7 +437,7 @@
},
sliTypesConfig: {
latency: {
library: (import 'sli-value-libraries/average-using-single-metric.libsonnet'),
library: (import 'sli-value-libraries/average-latency-using-seconds-target-metric.libsonnet'),
description: 'The average latency of %(sliDescription)s should be %(comparison)s %(metricTarget)0.1f',
targetMetrics: {
target: 'averageLatency',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Divides the count of target metric samples above latency target by the overall count of samples
// target metric samples taken from average-using-single-metric

// Target metrics:
// target - Metric to get the average value of over evaluation interval

// Additional config:
// latencyTarget in SLI spec

// MaC imports
local sliValueLibraryFunctions = import '../util/sli-value-library-functions.libsonnet';

// Grafana imports
local grafana = import 'grafonnet/grafana.libsonnet';
local prometheus = grafana.prometheus;
local graphPanel = grafana.graphPanel;

// Creates the custom SLI value rule
// @param sliSpec The spec for the SLI having its recording rules created
// @param sliMetadata Metadata about the type and category of the SLI
// @param config The config for the service defined in the mixin file
// @returns JSON defining the recording rule
local createSliValueRule(sliSpec, sliMetadata, config) =
local metricConfig = sliValueLibraryFunctions.getMetricConfig(sliSpec);
local ruleSelectors = sliValueLibraryFunctions.createRuleSelectors(metricConfig, sliSpec, config);
local targetMetrics = sliValueLibraryFunctions.getTargetMetrics(metricConfig, sliSpec);
local selectorLabels = sliValueLibraryFunctions.getSelectorLabels(metricConfig);

[
{
record: 'sli_value',
expr: |||
sum without (%(selectorLabels)s) (label_replace(label_replace(
(
sum by(%(selectorLabels)s) (avg_over_time((%(targetMetric)s{%(selectors)s} > bool %(latencyTarget)s)[%(evalInterval)s:%(evalInterval)s]))
/
count by(%(selectorLabels)s) (count_over_time(%(targetMetric)s{%(selectors)s}[%(evalInterval)s]))
),
"sli_environment", "$1", "%(environmentSelectorLabel)s", "(.*)"), "sli_product", "$1", "%(productSelectorLabel)s", "(.*)"))
||| % {
targetMetric: targetMetrics.target,
latencyTarget: sliSpec.latencyTarget,
selectorLabels: std.join(', ', std.objectValues(selectorLabels)),
environmentSelectorLabel: selectorLabels.environment,
productSelectorLabel: selectorLabels.product,
selectors: std.join(', ', ruleSelectors),
evalInterval: sliSpec.evalInterval,
},
labels: sliSpec.sliLabels + sliMetadata,
},
];

// Creates Grafana dashboard graph panel
// @param sliSpec The spec for the SLI having its dashboard created
// @returns Grafana graph panel object
local createGraphPanel(sliSpec) =
local metricConfig = sliValueLibraryFunctions.getMetricConfig(sliSpec);
local dashboardSelectors = sliValueLibraryFunctions.createDashboardSelectors(metricConfig, sliSpec);
local targetMetrics = sliValueLibraryFunctions.getTargetMetrics(metricConfig, sliSpec);

graphPanel.new(
title='%s' % sliSpec.sliDescription,
datasource='prometheus',
description=|||
* Sample interval is %(evalInterval)s
* Selectors are %(selectors)s
||| % {
selectors: std.strReplace(std.join(', ', sliValueLibraryFunctions.getSelectors(metricConfig, sliSpec)), '~', '\\~'),
evalInterval: sliSpec.evalInterval,
},
min=0,
fill=0,
).addTarget(
prometheus.target(
|||
sum(avg_over_time(%(targetMetric)s{%(selectors)s}[%(evalInterval)s]) > 0 or vector(0))
||| % {
targetMetric: targetMetrics.target,
latencyTarget: sliSpec.latencyTarget,
selectors: std.join(',', dashboardSelectors),
evalInterval: sliSpec.evalInterval,
},
legendFormat='avg latency',
),
).addTarget(
prometheus.target(
|||
sum(avg_over_time((%(targetMetric)s{%(selectors)s} > bool %(latencyTarget)s)[%(evalInterval)s:%(evalInterval)s]) or vector(0))
/
count(count_over_time(%(targetMetric)s{%(selectors)s}[%(evalInterval)s]))
||| % {
targetMetric: targetMetrics.target,
latencyTarget: sliSpec.latencyTarget,
selectors: std.join(',', dashboardSelectors),
evalInterval: sliSpec.evalInterval,
},
legendFormat='avg period where latency > %s seconds' % sliSpec.latencyTarget,
)
).addSeriesOverride(
{
alias: '/avg period where latency > %s seconds/' % sliSpec.latencyTarget,
color: 'red',
},
).addSeriesOverride(
{
alias: '/avg latancy/',
color: 'green',
},
);

// File exports
{
createSliValueRule(sliSpec, sliMetadata, config): createSliValueRule(sliSpec, sliMetadata, config),
createGraphPanel(sliSpec): createGraphPanel(sliSpec),
}

0 comments on commit 7991743

Please sign in to comment.