Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add read latency sli for rds #256

Merged
merged 14 commits into from
Sep 30, 2022
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions monitoring-as-code/mixin-defs/testing-mixin.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,12 @@ local sliSpecList = {
},
},
SLI12: {
title: 'test',
title: 'test - aws_rds_read',
sliDescription: 'test',
period: '7d',
metricType: 'aws_rds_read',
evalInterval: '5m',
latencyTarget: 0.001,
selectors: {
product: 'test',
},
Expand All @@ -198,11 +199,12 @@ local sliSpecList = {
},
},
SLI13: {
title: 'test',
title: 'test - aws_rds_write',
sliDescription: 'test',
period: '7d',
metricType: 'aws_rds_write',
evalInterval: '5m',
latencyTarget: 0.001,
selectors: {
product: 'test',
},
Expand Down
4 changes: 2 additions & 2 deletions monitoring-as-code/src/metric-types.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@
},
sliTypesConfig: {
latency: {
library: (import 'sli-value-libraries/average-using-single-metric.libsonnet'),
library: (import 'sli-value-libraries/average-latency-using-seconds-target-metric.libsonnet'),
description: 'The average latency of %(sliDescription)s should be %(comparison)s %(metricTarget)0.1f',
targetMetrics: {
target: 'averageLatency',
Expand Down Expand Up @@ -437,7 +437,7 @@
},
sliTypesConfig: {
latency: {
library: (import 'sli-value-libraries/average-using-single-metric.libsonnet'),
library: (import 'sli-value-libraries/average-latency-using-seconds-target-metric.libsonnet'),
description: 'The average latency of %(sliDescription)s should be %(comparison)s %(metricTarget)0.1f',
targetMetrics: {
target: 'averageLatency',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Divides the sum of if target metric samples above latency target by the sum of count over time of
georgeowusuHO marked this conversation as resolved.
Show resolved Hide resolved
// target metric samples taken from avaerage-using-single-metric
georgeowusuHO marked this conversation as resolved.
Show resolved Hide resolved

// Target metrics:
// target - Metric to get the average value of over evaluation interval

// Additional config:
// latencyTarget in SLI spec

// MaC imports
local sliValueLibraryFunctions = import '../util/sli-value-library-functions.libsonnet';

// Grafana imports
local grafana = import 'grafonnet/grafana.libsonnet';
local prometheus = grafana.prometheus;
local graphPanel = grafana.graphPanel;

// Creates the custom SLI value rule
// @param sliSpec The spec for the SLI having its recording rules created
// @param sliMetadata Metadata about the type and category of the SLI
// @param config The config for the service defined in the mixin file
// @returns JSON defining the recording rule
local createSliValueRule(sliSpec, sliMetadata, config) =
local metricConfig = sliValueLibraryFunctions.getMetricConfig(sliSpec);
local ruleSelectors = sliValueLibraryFunctions.createRuleSelectors(metricConfig, sliSpec, config);
local targetMetrics = sliValueLibraryFunctions.getTargetMetrics(metricConfig, sliSpec);
local selectorLabels = sliValueLibraryFunctions.getSelectorLabels(metricConfig);

[
{
record: 'sli_value',
expr: |||
sum without (%(selectorLabels)s) (label_replace(label_replace(
(
sum by(%(selectorLabels)s) (avg_over_time((%(targetMetric)s{%(selectors)s} > bool %(latencyTarget)s)[%(evalInterval)s:%(evalInterval)s]))
/
count by(%(selectorLabels)s) (count_over_time(%(targetMetric)s{%(selectors)s}[%(evalInterval)s]))
),
"sli_environment", "$1", "%(environmentSelectorLabel)s", "(.*)"), "sli_product", "$1", "%(productSelectorLabel)s", "(.*)"))
||| % {
targetMetric: targetMetrics.target,
latencyTarget: sliSpec.latencyTarget,
selectorLabels: std.join(', ', std.objectValues(selectorLabels)),
environmentSelectorLabel: selectorLabels.environment,
productSelectorLabel: selectorLabels.product,
selectors: std.join(', ', ruleSelectors),
evalInterval: sliSpec.evalInterval,
},
labels: sliSpec.sliLabels + sliMetadata,
},
];

// Creates Grafana dashboard graph panel
// @param sliSpec The spec for the SLI having its dashboard created
// @returns Grafana graph panel object
local createGraphPanel(sliSpec) =
local metricConfig = sliValueLibraryFunctions.getMetricConfig(sliSpec);
local dashboardSelectors = sliValueLibraryFunctions.createDashboardSelectors(metricConfig, sliSpec);
local targetMetrics = sliValueLibraryFunctions.getTargetMetrics(metricConfig, sliSpec);

graphPanel.new(
title='%s' % sliSpec.sliDescription,
datasource='prometheus',
description=|||
* Sample interval is %(evalInterval)s
* Selectors are %(selectors)s
||| % {
selectors: std.strReplace(std.join(', ', sliValueLibraryFunctions.getSelectors(metricConfig, sliSpec)), '~', '\\~'),
evalInterval: sliSpec.evalInterval,
},
min=0,
fill=0,
).addTarget(
prometheus.target(
|||
sum(avg_over_time(%(targetMetric)s{%(selectors)s}[%(evalInterval)s]) or vector(0))
||| % {
targetMetric: targetMetrics.target,
latencyTarget: sliSpec.latencyTarget,
selectors: std.join(',', dashboardSelectors),
evalInterval: sliSpec.evalInterval,
},
legendFormat='avg latency',
),
).addTarget(
prometheus.target(
|||
sum(avg_over_time((%(targetMetric)s{%(selectors)s} > bool %(latencyTarget)s)[%(evalInterval)s:%(evalInterval)s]) or vector(0))
/
count(count_over_time(%(targetMetric)s{%(selectors)s}[%(evalInterval)s]))
||| % {
targetMetric: targetMetrics.target,
latencyTarget: sliSpec.latencyTarget,
selectors: std.join(',', dashboardSelectors),
evalInterval: sliSpec.evalInterval,
},
legendFormat='avg period where latency > %s seconds' % sliSpec.latencyTarget,
)
).addSeriesOverride(
{
alias: '/avg period where latency > %s seconds/' % sliSpec.latencyTarget,
color: 'red',
},
).addSeriesOverride(
{
alias: '/avg latancy/',
color: 'green',
},
);

// File exports
{
createSliValueRule(sliSpec, sliMetadata, config): createSliValueRule(sliSpec, sliMetadata, config),
createGraphPanel(sliSpec): createGraphPanel(sliSpec),
}