Skip to content

Commit

Permalink
feat: slo freshness dashboards
Browse files Browse the repository at this point in the history
  • Loading branch information
BrunoReboul committed Mar 22, 2021
1 parent 61cb194 commit f224b01
Show file tree
Hide file tree
Showing 10 changed files with 402 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ func (instanceDeployment *InstanceDeployment) deployMonitoringDashboard() (err e
dashboardDeployment := mon.NewDashboardDeployment()
dashboardDeployment.Core = instanceDeployment.Core
dashboardDeployment.Settings.Instance.MON = instanceDeployment.Settings.Instance.MON
dashboardDeployment.Artifacts.Widgets = instanceDeployment.Artifacts.Widgets
dashboardDeployment.Artifacts = instanceDeployment.Artifacts
return dashboardDeployment.Deploy()
}
52 changes: 46 additions & 6 deletions services/setdashboards/meth_instancedeployment_situate.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,60 @@
package setdashboards

import (
"encoding/json"
"fmt"
"math"
"strings"

"github.com/BrunoReboul/ram/utilities/mon"
"google.golang.org/api/monitoring/v1"
)

// Situate complement settings taking in account the situation for service and instance settings
func (instanceDeployment *InstanceDeployment) Situate() (err error) {
instanceDeployment.Artifacts.Widgets = []*monitoring.Widget{}
for _, microserviceName := range instanceDeployment.Settings.Instance.MON.MicroServiceNameList {
for _, widgetType := range instanceDeployment.Settings.Instance.MON.WidgetTypeList {
widget, err := mon.GetGCFWidget(microserviceName, widgetType)
if err != nil {
return err
instanceDeployment.Artifacts.Tiles = []*monitoring.Tile{}
if instanceDeployment.Settings.Instance.MON.GridLayout.Columns != 0 {
for _, microserviceName := range instanceDeployment.Settings.Instance.MON.GridLayout.MicroServiceNameList {
for _, widgetType := range instanceDeployment.Settings.Instance.MON.GridLayout.WidgetTypeList {
widget, err := mon.GetGCFWidget(microserviceName, widgetType)
if err != nil {
return err
}
instanceDeployment.Artifacts.Widgets = append(instanceDeployment.Artifacts.Widgets, &widget)
}
instanceDeployment.Artifacts.Widgets = append(instanceDeployment.Artifacts.Widgets, &widget)
}
}
if instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.SLO != 0 {
grouwthFactor := math.Sqrt(2)
scale := 0.01
thresholdSeconds := scale * math.Pow(grouwthFactor, float64(instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.CutOffBucketNumber))
var thresholdText string
if thresholdSeconds < 60 {
thresholdText = fmt.Sprintf("%g seconds", math.Round(thresholdSeconds))
} else {
if thresholdSeconds < 60*60 {
thresholdText = fmt.Sprintf("%g minutes", math.Round(thresholdSeconds/60))
} else {
if thresholdSeconds < 60*60*60 {
thresholdText = fmt.Sprintf("%g hours", math.Round(thresholdSeconds/60/60))
}
}
}
slo := instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.SLO
sloText := fmt.Sprintf("%g%%", slo*100)
dashboardJSON := mon.SLOFreshnessTiles
dashboardJSON = strings.Replace(dashboardJSON, "<origin>", instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.Origin, -1)
dashboardJSON = strings.Replace(dashboardJSON, "<scope>", instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.Scope, -1)
dashboardJSON = strings.Replace(dashboardJSON, "<flow>", instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.Flow, -1)
dashboardJSON = strings.Replace(dashboardJSON, "<slo>", fmt.Sprintf("%g", slo), -1)
dashboardJSON = strings.Replace(dashboardJSON, "<lowerBound>", fmt.Sprintf("%g", math.Floor(slo*10)/10), -1)
dashboardJSON = strings.Replace(dashboardJSON, "<thresholdSeconds>", fmt.Sprintf("%v", thresholdSeconds), -1)
dashboardJSON = strings.Replace(dashboardJSON, "<thresholdText>", thresholdText, -1)
dashboardJSON = strings.Replace(dashboardJSON, "<sloText>", sloText, -1)
err = json.Unmarshal([]byte(dashboardJSON), &instanceDeployment.Artifacts.Tiles)
if err != nil {
return fmt.Errorf("json.Unmarshal SLOFreshnessTiles %v", err)
}
}
return nil
Expand Down
1 change: 1 addition & 0 deletions services/setdashboards/type_instancedeployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type InstanceDeployment struct {
DumpTimestamp time.Time `yaml:"dumpTimestamp"`
Artifacts struct {
Widgets []*monitoring.Widget
Tiles []*monitoring.Tile
}
Core *deploy.Core
Settings struct {
Expand Down
212 changes: 212 additions & 0 deletions utilities/mon/const_dashboardslofreshness.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the 'License');
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an 'AS IS' BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package mon

// SLOFreshnessTiles freshness SLO dashboard JSON template
const SLOFreshnessTiles = `
[
{
"height": 2,
"width": 4,
"widget": {
"title": "<scope> <flow> <sloText> < <thresholdText>",
"text": {
"content": "**Freshness**: <sloText> of <scope> configurations from <flow> flow over the last 28 days should be analyzed in less than <thresholdText>.",
"format": "MARKDOWN"
}
}
},
{
"height": 2,
"width": 3,
"xPos": 4,
"widget": {
"title": "SLI vs SLO",
"scorecard": {
"gaugeView": {
"lowerBound": <lowerBound>,
"upperBound": 1.0
},
"thresholds": [
{
"color": "RED",
"direction": "BELOW",
"value": <slo>
}
],
"timeSeriesQuery": {
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| fraction_less_than_from <thresholdSeconds>"
}
}
}
},
{
"height": 2,
"width": 3,
"xPos": 7,
"widget": {
"title": "Remaining ERROR BUDGET",
"scorecard": {
"thresholds": [
{
"color": "YELLOW",
"direction": "BELOW",
"value": 0.1
}
],
"timeSeriesQuery": {
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| fraction_less_than_from <thresholdSeconds>\n| neg\n| add 1\n| div 0.01\n| neg\n| add 1"
}
}
}
},
{
"height": 2,
"width": 2,
"xPos": 10,
"widget": {
"title": "Configurations analyzed in 28 days",
"scorecard": {
"sparkChartView": {
"sparkChartType": "SPARK_LINE"
},
"timeSeriesQuery": {
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| count_from"
}
}
}
},
{
"height": 9,
"width": 3,
"xPos": 9,
"yPos": 2,
"widget": {
"title": "Last 28days heatmap",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"plotType": "HEATMAP",
"timeSeriesQuery": {
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter (metric.microservice_name == 'stream2bq')\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| graph_period 28d"
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
}
},
{
"height": 3,
"width": 9,
"yPos": 2,
"widget": {
"title": "Error budget burnrate on 7d sliding windows - Email when > 1.5",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"plotType": "LINE",
"timeSeriesQuery": {
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n|filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(1m)\n| every 1m\n| group_by [metric.microservice_name], sliding(7d)\n| fraction_less_than_from <thresholdSeconds>\n| neg\n| add 1\n| div 0.01\n| cast_units \"1\""
}
}
],
"thresholds": [
{
"value": 1.5
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
}
},
{
"height": 3,
"width": 9,
"yPos": 5,
"widget": {
"title": "Error budget burnrate on 12h sliding windows - Alert when > 3",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"plotType": "LINE",
"timeSeriesQuery": {
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n|filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(1m)\n| every 1m\n| group_by [metric.microservice_name], sliding(12h)\n| fraction_less_than_from <thresholdSeconds>\n| neg\n| add 1\n| div 0.01\n| cast_units \"1\""
}
}
],
"thresholds": [
{
"value": 3.0
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
}
},
{
"height": 3,
"width": 9,
"yPos": 8,
"widget": {
"title": "Error budget burnrate on 1h sliding windows - Alert when > 9",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"plotType": "LINE",
"timeSeriesQuery": {
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter (metric.microservice_name == 'stream2bq')\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| graph_period 28d"
}
}
],
"thresholds": [
{
"value": 9.0
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
}
}
]
`
Loading

0 comments on commit f224b01

Please sign in to comment.