From f392e9ade58840d145eb8c0b156a97306f18bc32 Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Tue, 18 May 2021 18:26:32 +0200 Subject: [PATCH 1/8] fix: Migrate sample configurations to v2 --- samples/.env.sample | 21 +++++ .../slo_gae_app_availability.yaml | 35 ++++++++ .../cloud_monitoring/slo_gae_app_latency.yaml | 24 ++++++ .../slo_lb_request_availability.yaml | 27 ++++++ .../slo_lb_request_latency.yaml | 25 ++++++ .../slo_pubsub_subscription_throughput.yaml | 25 ++++++ .../slo_gae_app_availability.yaml | 36 ++++++++ .../slo_gae_app_availability_basic.yaml | 20 +++++ .../slo_gae_app_latency.yaml | 24 ++++++ .../slo_gae_app_latency_basic.yaml | 21 +++++ .../slo_gke_app_availability_basic.yaml | 21 +++++ ...gke_app_availability_basic_deprecated.yaml | 23 +++++ .../slo_gke_app_latency_basic.yaml | 22 +++++ .../slo_gke_app_latency_basic_deprecated.yaml | 24 ++++++ .../slo_lb_request_availability.yaml | 27 ++++++ .../slo_lb_request_latency.yaml | 25 ++++++ samples/config.yaml | 85 +++++++++++++++++++ ...slo_custom_app_availability_query_sli.yaml | 41 ++++----- .../slo_custom_app_availability_ratio.yaml | 41 ++++----- .../slo_dd_app_availability_query_sli.yaml | 49 ++++------- .../slo_dd_app_availability_query_slo.yaml | 43 ++++------ .../slo_dd_app_availability_ratio.yaml | 50 ++++------- .../slo_dt_app_availability_ratio.yaml | 32 +++---- .../slo_dt_app_latency_threshold.yaml | 51 +++++------ samples/elasticsearch/slo_elk_test_ratio.yaml | 60 +++++-------- samples/error_budget_policy.yaml | 43 ---------- samples/error_budget_policy_ssm.yaml | 27 ------ ...o_prom_metrics_availability_query_sli.yaml | 49 ++++------- .../slo_prom_metrics_availability_ratio.yaml | 49 ++++------- ...prom_metrics_latency_distribution_cut.yaml | 45 ++++------ .../slo_prom_metrics_latency_query_sli.yaml | 64 ++++++-------- .../stackdriver/slo_gae_app_availability.yaml | 46 ---------- samples/stackdriver/slo_gae_app_latency.yaml | 35 -------- .../slo_lb_request_availability.yaml | 38 --------- .../stackdriver/slo_lb_request_latency.yaml | 36 -------- .../slo_pubsub_subscription_throughput.yaml | 39 --------- .../slo_gae_app_availability.yaml | 44 ---------- .../slo_gae_app_availability_basic.yaml | 28 ------ .../slo_gae_app_latency.yaml | 32 ------- .../slo_gae_app_latency_basic.yaml | 29 ------- .../slo_gke_app_availability_basic.yaml | 29 ------- ...gke_app_availability_basic_deprecated.yaml | 31 ------- .../slo_gke_app_latency_basic.yaml | 30 ------- .../slo_gke_app_latency_basic_deprecated.yaml | 32 ------- .../slo_lb_request_availability.yaml | 35 -------- .../slo_lb_request_latency.yaml | 33 ------- ...zz_slo_pubsub_subscription_throughput.yaml | 35 -------- 47 files changed, 703 insertions(+), 978 deletions(-) create mode 100644 samples/.env.sample create mode 100644 samples/cloud_monitoring/slo_gae_app_availability.yaml create mode 100644 samples/cloud_monitoring/slo_gae_app_latency.yaml create mode 100644 samples/cloud_monitoring/slo_lb_request_availability.yaml create mode 100644 samples/cloud_monitoring/slo_lb_request_latency.yaml create mode 100644 samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_availability.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_latency.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml create mode 100644 samples/cloud_service_monitoring/slo_lb_request_availability.yaml create mode 100644 samples/cloud_service_monitoring/slo_lb_request_latency.yaml create mode 100644 samples/config.yaml delete mode 100644 samples/error_budget_policy.yaml delete mode 100644 samples/error_budget_policy_ssm.yaml delete mode 100644 samples/stackdriver/slo_gae_app_availability.yaml delete mode 100644 samples/stackdriver/slo_gae_app_latency.yaml delete mode 100644 samples/stackdriver/slo_lb_request_availability.yaml delete mode 100644 samples/stackdriver/slo_lb_request_latency.yaml delete mode 100644 samples/stackdriver/slo_pubsub_subscription_throughput.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml delete mode 100644 samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml diff --git a/samples/.env.sample b/samples/.env.sample new file mode 100644 index 00000000..bb90163d --- /dev/null +++ b/samples/.env.sample @@ -0,0 +1,21 @@ +export STACKDRIVER_HOST_PROJECT_ID= +export GAE_PROJECT_ID= +export GAE_MODULE_ID= +export LB_PROJECT_ID= +export PUBSUB_PROJECT_ID= +export PUBSUB_TOPIC_NAME= +export GKE_PROJECT_ID= +export GKE_LOCATION= +export GKE_CLUSTER_NAME= +export GKE_SERVICE_NAMESPACE= +export GKE_SERVICE_NAME= +export GKE_MESH_UID= +export ELASTICSEARCH_URL= +export PROMETHEUS_URL= +export PROMETHEUS_PUSHGATEWAY_URL= +export DATADOG_SLO_ID= +export DATADOG_API_KEY= +export DATADOG_APP_KEY= +export DYNATRACE_API_URL= +export DYNATRACE_API_TOKEN= +export BIGQUERY_PROJECT_ID= diff --git a/samples/cloud_monitoring/slo_gae_app_availability.yaml b/samples/cloud_monitoring/slo_gae_app_availability.yaml new file mode 100644 index 00000000..675f290f --- /dev/null +++ b/samples/cloud_monitoring/slo_gae_app_availability.yaml @@ -0,0 +1,35 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-availability + labels: + service_name: gae + feature_name: app + slo_name: availability +spec: + description: Availability of App Engine app + backend: cloud_monitoring + method: good_bad_ratio + exporters: + - cloud_monitoring + service_level_indicator: + filter_good: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + ( metric.labels.response_code = 429 OR + metric.labels.response_code = 200 OR + metric.labels.response_code = 201 OR + metric.labels.response_code = 202 OR + metric.labels.response_code = 203 OR + metric.labels.response_code = 204 OR + metric.labels.response_code = 205 OR + metric.labels.response_code = 206 OR + metric.labels.response_code = 207 OR + metric.labels.response_code = 208 OR + metric.labels.response_code = 226 OR + metric.labels.response_code = 304 ) + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + goal: 0.95 diff --git a/samples/cloud_monitoring/slo_gae_app_latency.yaml b/samples/cloud_monitoring/slo_gae_app_latency.yaml new file mode 100644 index 00000000..3f0bf38d --- /dev/null +++ b/samples/cloud_monitoring/slo_gae_app_latency.yaml @@ -0,0 +1,24 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-latency724ms + labels: + service_name: gae + feature_name: app + slo_name: latency724ms +spec: + description: Latency of App Engine app requests < 724ms + backend: cloud_monitoring + method: distribution_cut + exporters: + - cloud_monitoring + service_level_indicator: + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_latencies" + resource.type="gae_app" + metric.labels.response_code >= 200 + metric.labels.response_code < 500 + good_below_threshold: true + threshold_bucket: 19 + goal: 0.999 diff --git a/samples/cloud_monitoring/slo_lb_request_availability.yaml b/samples/cloud_monitoring/slo_lb_request_availability.yaml new file mode 100644 index 00000000..0a9b6f57 --- /dev/null +++ b/samples/cloud_monitoring/slo_lb_request_availability.yaml @@ -0,0 +1,27 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-availability + labels: + service_name: lb + feature_name: request + slo_name: availability +spec: + description: Availability of HTTP Load Balancer + backend: cloud_monitoring + method: good_bad_ratio + exporters: + - cloud_monitoring + service_level_indicator: + filter_good: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + goal: 0.98 diff --git a/samples/cloud_monitoring/slo_lb_request_latency.yaml b/samples/cloud_monitoring/slo_lb_request_latency.yaml new file mode 100644 index 00000000..56df0de2 --- /dev/null +++ b/samples/cloud_monitoring/slo_lb_request_latency.yaml @@ -0,0 +1,25 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-latency724ms + labels: + service_name: lb + feature_name: request + slo_name: latency724ms +spec: + description: Latency of HTTP Load Balancer < 724ms + backend: cloud_monitoring + method: distribution_cut + exporters: + - cloud_monitoring + service_level_indicator: + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/total_latencies" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + good_below_threshold: true + threshold_bucket: 19 + goal: 0.98 diff --git a/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml b/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml new file mode 100644 index 00000000..1e2721c5 --- /dev/null +++ b/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml @@ -0,0 +1,25 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: pubsub-subscription-throughput + labels: + service_name: pubsub + feature_name: subscription + slo_name: throughput +spec: + description: Throughput of Pub/Sub subscription + backend: cloud_monitoring + method: good_bad_ratio + exporters: + - cloud_monitoring + - pubsub + service_level_indicator: + filter_good: > + project="${PUBSUB_PROJECT_ID}" + metric.type="pubsub.googleapis.com/subscription/ack_message_count" + resource.type="pubsub_subscription" + filter_bad: > + project="${PUBSUB_PROJECT_ID}" + metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" + resource.type="pubsub_subscription" + goal: 0.95 diff --git a/samples/cloud_service_monitoring/slo_gae_app_availability.yaml b/samples/cloud_service_monitoring/slo_gae_app_availability.yaml new file mode 100644 index 00000000..a2e97de3 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_availability.yaml @@ -0,0 +1,36 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-availability + labels: + service_name: gae + feature_name: app + slo_name: availability +spec: + description: Availability of App Engine app + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: good_bad_ratio + exporters: [] + service_level_indicator: + filter_good: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + ( metric.labels.response_code = 429 OR + metric.labels.response_code = 200 OR + metric.labels.response_code = 201 OR + metric.labels.response_code = 202 OR + metric.labels.response_code = 203 OR + metric.labels.response_code = 204 OR + metric.labels.response_code = 205 OR + metric.labels.response_code = 206 OR + metric.labels.response_code = 207 OR + metric.labels.response_code = 208 OR + metric.labels.response_code = 226 OR + metric.labels.response_code = 304 ) + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + goal: 0.95 diff --git a/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml b/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml new file mode 100644 index 00000000..dd6507ff --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml @@ -0,0 +1,20 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-availability + labels: + service_name: gae + feature_name: app + slo_name: availability +spec: + description: Availability of App Engine app + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + app_engine: + project_id: ${GAE_PROJECT_ID} + module_id: ${GAE_MODULE_ID} + availability: {} + goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gae_app_latency.yaml b/samples/cloud_service_monitoring/slo_gae_app_latency.yaml new file mode 100644 index 00000000..c949fb88 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_latency.yaml @@ -0,0 +1,24 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-latency724ms + labels: + service_name: gae + feature_name: app + slo_name: latency724ms +spec: + description: Latency of App Engine app requests < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: distribution_cut + exporters: [] + service_level_indicator: + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_latencies" + resource.type="gae_app" + metric.labels.response_code >= 200 + metric.labels.response_code < 500 + range_min: 0 + range_max: 724 + goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml b/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml new file mode 100644 index 00000000..e648e99d --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml @@ -0,0 +1,21 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-latency724ms + labels: + service_name: gae + feature_name: app + slo_name: latency724ms +spec: + description: Latency of App Engine app requests < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + app_engine: + project_id: ${GAE_PROJECT_ID} + module_id: ${GAE_MODULE_ID} + latency: + threshold: 724 # ms + goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml b/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml new file mode 100644 index 00000000..74c32976 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml @@ -0,0 +1,21 @@ +# apiVersion: sre.google.com/v2 +# kind: ServiceLevelObjective +# metadata: +# name: gke-service-availability +# labels: +# service_name: gke +# feature_name: service +# slo_name: availability +# spec: +# description: Availability of GKE service +# error_budget_policy: cloud_service_monitoring +# backend: cloud_service_monitoring +# method: basic +# exporters: [] +# service_level_indicator: +# mesh_istio: +# mesh_uid: ${GKE_MESH_UID} +# service_namespace: ${GKE_SERVICE_NAMESPACE} +# service_name: ${GKE_SERVICE_NAME} +# availability: {} +# goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml b/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml new file mode 100644 index 00000000..ffa2c56e --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml @@ -0,0 +1,23 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gke-service-availability + labels: + service_name: gke + feature_name: service + slo_name: availability +spec: + description: Availability of GKE service + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + cluster_istio: + project_id: ${GKE_PROJECT_ID} + location: ${GKE_LOCATION} + cluster_name: ${GKE_CLUSTER_NAME} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + availability: {} + goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml b/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml new file mode 100644 index 00000000..b0152afb --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml @@ -0,0 +1,22 @@ +# apiVersion: sre.google.com/v2 +# kind: ServiceLevelObjective +# metadata: +# name: gke-service-latency724ms +# labels: +# service_name: gke +# feature_name: service +# slo_name: latency724ms +# spec: +# description: Latency of GKE service requests < 724ms +# error_budget_policy: cloud_service_monitoring +# backend: cloud_service_monitoring +# method: basic +# exporters: [] +# service_level_indicator: +# mesh_istio: +# mesh_uid: ${GKE_MESH_UID} +# service_namespace: ${GKE_SERVICE_NAMESPACE} +# service_name: ${GKE_SERVICE_NAME} +# latency: +# threshold: 724 # ms +# goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml b/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml new file mode 100644 index 00000000..4c001c38 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml @@ -0,0 +1,24 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gke-service-latency724ms + labels: + service_name: gke + feature_name: service + slo_name: latency724ms +spec: + description: Latency of GKE service requests < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + cluster_istio: + project_id: ${GKE_PROJECT_ID} + location: ${GKE_LOCATION} + cluster_name: ${GKE_CLUSTER_NAME} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + latency: + threshold: 724 # ms + goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_lb_request_availability.yaml b/samples/cloud_service_monitoring/slo_lb_request_availability.yaml new file mode 100644 index 00000000..6bc1df9c --- /dev/null +++ b/samples/cloud_service_monitoring/slo_lb_request_availability.yaml @@ -0,0 +1,27 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-availability + labels: + service_name: lb + feature_name: request + slo_name: availability +spec: + description: Availability of HTTP Load Balancer + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: good_bad_ratio + exporters: [] + service_level_indicator: + filter_good: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_lb_request_latency.yaml b/samples/cloud_service_monitoring/slo_lb_request_latency.yaml new file mode 100644 index 00000000..2b326176 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_lb_request_latency.yaml @@ -0,0 +1,25 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-latency724ms + labels: + service_name: lb + feature_name: request + slo_name: latency724ms +spec: + description: Latency of HTTP Load Balancer < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: distribution_cut + exporters: [] + service_level_indicator: + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/total_latencies" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + range_min: 0 + range_max: 724 # ms + goal: 0.98 diff --git a/samples/config.yaml b/samples/config.yaml new file mode 100644 index 00000000..58661ceb --- /dev/null +++ b/samples/config.yaml @@ -0,0 +1,85 @@ +--- +backends: + cloud_monitoring: + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + cloud_service_monitoring: + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + custom.custom_backend.CustomBackend: {} + datadog: + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} + dynatrace: + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + elasticsearch: + url: ${ELASTICSEARCH_URL} + prometheus: + url: ${PROMETHEUS_URL} + +exporters: + bigquery: + project_id: ${BIGQUERY_PROJECT_ID} + dataset_id: slos + table_id: reports + cloud_monitoring: + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + custom.custom_exporter.CustomMetricExporter: {} + datadog: + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} + dynatrace: + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + prometheus: + url: ${PROMETHEUS_PUSHGATEWAY_URL} + pubsub: + project_id: ${PUBSUB_PROJECT_ID} + topic_name: ${PUBSUB_TOPIC_NAME} + +error_budget_policies: + default: + steps: + - name: 1 hour + burn_rate_threshold: 9 + alert: true + message_alert: Page to defend the SLO + message_ok: Last hour on track + window: 3600 + - name: 12 hours + burn_rate_threshold: 3 + alert: true + message_alert: Page to defend the SLO + message_ok: Last 12 hours on track + window: 43200 + - name: 7 days + burn_rate_threshold: 1.5 + alert: false + message_alert: Dev team dedicates 25% of engineers to the reliability backlog + message_ok: Last week on track + window: 604800 + - name: 28 days + burn_rate_threshold: 1 + alert: false + message_alert: Freeze release, unless related to reliability or security + message_ok: Unfreeze release, per the agreed roll-out policy + window: 2419200 + cloud_service_monitoring: + steps: + - name: 24 hours + burn_rate_threshold: 3 + alert: true + message_alert: Page to defend the SLO + message_ok: Last 24 hours on track + window: 86400 + - name: 7 days + burn_rate_threshold: 1.5 + alert: false + message_alert: Dev team dedicates 25% of engineers to the reliability backlog + message_ok: Last week on track + window: 604800 + - name: 28 days + burn_rate_threshold: 1 + alert: false + message_alert: Freeze release, unless related to reliability or security + message_ok: Unfreeze release, per the agreed roll-out policy + window: 2419200 diff --git a/samples/custom/slo_custom_app_availability_query_sli.yaml b/samples/custom/slo_custom_app_availability_query_sli.yaml index acfb64dd..858a439a 100644 --- a/samples/custom/slo_custom_app_availability_query_sli.yaml +++ b/samples/custom/slo_custom_app_availability_query_sli.yaml @@ -1,25 +1,16 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: custom -feature_name: test -slo_description: 99.99% of fake requests to custom backends are valid -slo_name: availability-sli -slo_target: 0.999 -backend: - class: custom.custom_backend.CustomBackend - method: query_sli -exporters: -- class: custom.custom_exporter.CustomMetricExporter - class: custom.custom_exporter.CustomSLOExporter +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: custom-test-availability-sli + labels: + service_name: custom + feature_name: test + slo_name: availability-sli +spec: + description: 99.99% of fake requests to custom backends are valid + backend: custom.custom_backend.CustomBackend + method: query_sli + exporters: + - custom.custom_exporter.CustomMetricExporter + service_level_indicator: {} + goal: 0.999 diff --git a/samples/custom/slo_custom_app_availability_ratio.yaml b/samples/custom/slo_custom_app_availability_ratio.yaml index 6ba841f5..dec63465 100644 --- a/samples/custom/slo_custom_app_availability_ratio.yaml +++ b/samples/custom/slo_custom_app_availability_ratio.yaml @@ -1,25 +1,16 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: custom -feature_name: test -slo_description: 99.99% of fake requests to custom backends are valid -slo_name: availability-ratio -slo_target: 0.999 -backend: - class: custom.custom_backend.CustomBackend - method: good_bad_ratio -exporters: -- class: custom.custom_exporter.CustomMetricExporter - class: custom.custom_exporter.CustomSLOExporter +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: custom-test-availability-ratio + labels: + service_name: custom + feature_name: test + slo_name: availability-ratio +spec: + description: 99.99% of fake requests to custom backends are valid + backend: custom.custom_backend.CustomBackend + method: good_bad_ratio + exporters: + - custom.custom_exporter.CustomMetricExporter + service_level_indicator: {} + goal: 0.999 diff --git a/samples/datadog/slo_dd_app_availability_query_sli.yaml b/samples/datadog/slo_dd_app_availability_query_sli.yaml index 47ad78b5..2fe9ed36 100644 --- a/samples/datadog/slo_dd_app_availability_query_sli.yaml +++ b/samples/datadog/slo_dd_app_availability_query_sli.yaml @@ -1,31 +1,18 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dd -feature_name: app -slo_name: availability -slo_description: 99% of app requests return a valid HTTP code -slo_target: 0.99 -backend: - class: Datadog - method: query_sli - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - # api_host: api.datadoghq.eu # uncomment to use EU site - measurement: - query: sum:app.requests.count{http.path:/, http.status_code_class:2xx}.as_count() / sum:app.requests.count{http.path:/}.as_count() -exporters: - - class: Datadog - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dd-app-availability + labels: + service_name: dd + feature_name: app + slo_name: availability +spec: + description: 99% of app requests return a valid HTTP code + backend: datadog + method: query_sli + exporters: + - datadog + service_level_indicator: + query: sum:app.requests.count{http.path:/, http.status_code_class:2xx}.as_count() + / sum:app.requests.count{http.path:/}.as_count() + goal: 0.99 diff --git a/samples/datadog/slo_dd_app_availability_query_slo.yaml b/samples/datadog/slo_dd_app_availability_query_slo.yaml index bc1c5b19..3465a64d 100644 --- a/samples/datadog/slo_dd_app_availability_query_slo.yaml +++ b/samples/datadog/slo_dd_app_availability_query_slo.yaml @@ -1,27 +1,16 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dd -feature_name: app -slo_name: availability -slo_description: 99% of app requests return a valid HTTP code -slo_target: 0.99 -backend: - class: Datadog - method: query_slo - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - # api_host: api.datadoghq.eu # uncomment to use EU site - measurement: - slo_id: ${DATADOG_SLO_ID} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dd-app-availability + labels: + service_name: dd + feature_name: app + slo_name: availability +spec: + description: 99% of app requests return a valid HTTP code + backend: datadog + method: query_slo + exporters: [] + service_level_indicator: + slo_id: ${DATADOG_SLO_ID} + goal: 0.99 diff --git a/samples/datadog/slo_dd_app_availability_ratio.yaml b/samples/datadog/slo_dd_app_availability_ratio.yaml index e524b23d..aa8be867 100644 --- a/samples/datadog/slo_dd_app_availability_ratio.yaml +++ b/samples/datadog/slo_dd_app_availability_ratio.yaml @@ -1,32 +1,18 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dd -feature_name: app -slo_name: availability -slo_description: 99% of app requests return a valid HTTP code -slo_target: 0.99 -backend: - class: Datadog - method: good_bad_ratio - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - # api_host: api.datadoghq.eu # uncomment to use EU site - measurement: - query_good: app.requests.count{http.path:/, http.status_code_class:2xx} - query_valid: app.requests.count{http.path:/} -exporters: - - class: Datadog - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dd-app-availability + labels: + service_name: dd + feature_name: app + slo_name: availability +spec: + description: 99% of app requests return a valid HTTP code + backend: datadog + method: good_bad_ratio + exporters: + - datadog + service_level_indicator: + query_good: app.requests.count{http.path:/, http.status_code_class:2xx} + query_valid: app.requests.count{http.path:/} + goal: 0.99 diff --git a/samples/dynatrace/slo_dt_app_availability_ratio.yaml b/samples/dynatrace/slo_dt_app_availability_ratio.yaml index ac705550..af53cd27 100644 --- a/samples/dynatrace/slo_dt_app_availability_ratio.yaml +++ b/samples/dynatrace/slo_dt_app_availability_ratio.yaml @@ -12,25 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -service_name: dt -feature_name: app -slo_name: availability -slo_description: 99.9% of app requests return a good HTTP code -slo_target: 0.999 -backend: - class: Dynatrace - method: good_bad_ratio - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - measurement: +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dt-app-availability + labels: + service_name: dt + feature_name: app + slo_name: availability +spec: + description: 99.9% of app requests return a good HTTP code + backend: dynatrace + method: good_bad_ratio + exporters: + - dynatrace + service_level_indicator: query_good: metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) entity_selector: type(HOST) query_valid: metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod))) entity_selector: type(HOST) -exporters: -- class: Dynatrace - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - metric_timeseries_id: custom:slo.error_budget_burn_rate + goal: 0.999 diff --git a/samples/dynatrace/slo_dt_app_latency_threshold.yaml b/samples/dynatrace/slo_dt_app_latency_threshold.yaml index bd97bb3d..d12eda9a 100644 --- a/samples/dynatrace/slo_dt_app_latency_threshold.yaml +++ b/samples/dynatrace/slo_dt_app_latency_threshold.yaml @@ -1,33 +1,20 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dt -feature_name: app -slo_name: latency -slo_description: 99.9% of app 2xx requests return within 50ms -slo_target: 0.999 -backend: - class: Dynatrace - method: threshold - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - measurement: +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dt-app-latency + labels: + service_name: dt + feature_name: app + slo_name: latency +spec: + description: 99.9% of app 2xx requests return within 50ms + backend: dynatrace + method: threshold + exporters: + - dynatrace + service_level_indicator: query_valid: - metric_selector: ext:app.request_latency:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) - entity_selector: type(HOST) - threshold: 50000 # us -exporters: -- class: Dynatrace - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} + metric_selector: ext:app.request_latency:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) + entity_selector: type(HOST) + threshold: 50000 # us + goal: 0.999 diff --git a/samples/elasticsearch/slo_elk_test_ratio.yaml b/samples/elasticsearch/slo_elk_test_ratio.yaml index 2114f902..3c3be695 100644 --- a/samples/elasticsearch/slo_elk_test_ratio.yaml +++ b/samples/elasticsearch/slo_elk_test_ratio.yaml @@ -1,41 +1,27 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: elk -feature_name: test -slo_description: > - SLO for random test data generated with the - https://github.com/oliver006/elasticsearch-test-data -slo_name: errors -slo_target: 1 -backend: - class: Elasticsearch - url: ${ELASTICSEARCH_URL} - method: good_bad_ratio - measurement: - index: test_data - date_field: last_updated - query_good: {} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: elk-test-errors + labels: + service_name: elk + feature_name: test + slo_name: errors +spec: + description: > + SLO for random test data generated with the + https://github.com/oliver006/elasticsearch-test-data + backend: elasticsearch + method: good_bad_ratio + exporters: + - pubsub + - cloud_monitoring + service_level_indicator: + index: test_data + date_field: last_updated + query_good: {} query_bad: must: term: - name: JAgOZE8 + name: JAgOZE8 -exporters: -- class: Pubsub - project_id: ${PUBSUB_PROJECT_ID} - topic_name: ${PUBSUB_TOPIC_NAME} - -- class: Stackdriver - project_id: ${PUBSUB_PROJECT_ID} + goal: 1 diff --git a/samples/error_budget_policy.yaml b/samples/error_budget_policy.yaml deleted file mode 100644 index d491fc97..00000000 --- a/samples/error_budget_policy.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -- error_budget_policy_step_name: 1 hour - measurement_window_seconds: 3600 - alerting_burn_rate_threshold: 9 - urgent_notification: true - overburned_consequence_message: Page to defend the SLO - achieved_consequence_message: Last hour on track - -- error_budget_policy_step_name: 12 hours - measurement_window_seconds: 43200 - alerting_burn_rate_threshold: 3 - urgent_notification: true - overburned_consequence_message: Page to defend the SLO - achieved_consequence_message: Last 12 hours on track - -- error_budget_policy_step_name: 7 days - measurement_window_seconds: 604800 - alerting_burn_rate_threshold: 1.5 - urgent_notification: false - overburned_consequence_message: Dev team dedicates 25% of engineers to the - reliability backlog - achieved_consequence_message: Last week on track - -- error_budget_policy_step_name: 28 days - measurement_window_seconds: 2419200 - alerting_burn_rate_threshold: 1 - urgent_notification: false - overburned_consequence_message: Freeze release, unless related to reliability - or security - achieved_consequence_message: Unfreeze release, per the agreed roll-out policy diff --git a/samples/error_budget_policy_ssm.yaml b/samples/error_budget_policy_ssm.yaml deleted file mode 100644 index 9abeef1b..00000000 --- a/samples/error_budget_policy_ssm.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -- error_budget_policy_step_name: 24 hours - measurement_window_seconds: 86400 - alerting_burn_rate_threshold: 4 - urgent_notification: true - overburned_consequence_message: Page to defend the SLO - achieved_consequence_message: Last 24 hours on track - -- error_budget_policy_step_name: 48 hours - measurement_window_seconds: 172800 - alerting_burn_rate_threshold: 2 - urgent_notification: true - overburned_consequence_message: Page to defend the SLO - achieved_consequence_message: Last 48 hours on track diff --git a/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml b/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml index 05076a04..7f4f6718 100644 --- a/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml +++ b/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml @@ -1,35 +1,20 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_name: availability -slo_description: 99.9% of Prometheus requests return a good HTTP code -slo_target: 0.999 -backend: - class: Prometheus - method: query_sli - url: ${PROMETHEUS_URL} - # Basic auth example: - # headers: - # Content-Type: application/json - # Authorization: Basic b2s6cGFzcW== # username:password base64-encoded - measurement: - expression: > +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-availability + labels: + service_name: prom + feature_name: metrics + slo_name: availability +spec: + description: 99.9% of Prometheus requests return a good HTTP code + backend: prometheus + method: query_sli + exporters: + - prometheus + service_level_indicator: + expression: > sum(rate(prometheus_http_requests_total{handler="/metrics", code=~"2.."}[window])) / sum(rate(prometheus_http_requests_total{handler="/metrics"}[window])) -exporters: -- class: Prometheus - url: ${PROMETHEUS_PUSHGATEWAY_URL} + goal: 0.999 diff --git a/samples/prometheus/slo_prom_metrics_availability_ratio.yaml b/samples/prometheus/slo_prom_metrics_availability_ratio.yaml index 23b5171e..7e4db0cf 100644 --- a/samples/prometheus/slo_prom_metrics_availability_ratio.yaml +++ b/samples/prometheus/slo_prom_metrics_availability_ratio.yaml @@ -1,34 +1,19 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_name: availability -slo_description: 99.9% of Prometheus requests return a good HTTP code -slo_target: 0.999 -backend: - class: Prometheus - method: good_bad_ratio - url: ${PROMETHEUS_URL} - # Basic auth example: - # headers: - # Content-Type: application/json - # Authorization: Basic b2s6cGFzcW== # username:password base64-encoded - measurement: - filter_good: prometheus_http_requests_total{handler="/metrics", code=~"2.."} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-availability + labels: + service_name: prom + feature_name: metrics + slo_name: availability +spec: + description: 99.9% of Prometheus requests return a good HTTP code + backend: prometheus + method: good_bad_ratio + exporters: + - prometheus + service_level_indicator: + filter_good: prometheus_http_requests_total{handler="/metrics", code=~"2.."} filter_valid: prometheus_http_requests_total{handler="/metrics"} # filter_bad: prometheus_http_requests_total{code=~"5..", handler="/metrics"} # alternative to filter_valid field -exporters: -- class: Prometheus - url: ${PROMETHEUS_PUSHGATEWAY_URL} + goal: 0.999 diff --git a/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml b/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml index b2b8da92..95ea93d2 100644 --- a/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml +++ b/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml @@ -1,29 +1,18 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_description: 99.99% of Prometheus requests return in less than 250ms -slo_name: latency -slo_target: 0.9999 -backend: - class: Prometheus - url: ${PROMETHEUS_URL} - method: distribution_cut - measurement: - expression: http_request_duration_seconds_bucket{handler="/metrics", code=~"2.."} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-latency + labels: + service_name: prom + feature_name: metrics + slo_name: latency +spec: + description: 99.99% of Prometheus requests return in less than 250ms + backend: prometheus + method: distribution_cut + exporters: + - prometheus + service_level_indicator: + expression: http_request_duration_seconds_bucket{handler="/metrics", code=~"2.."} threshold_bucket: 0.25 # in seconds, corresponds to the `le` (less than) PromQL label -exporters: -- class: Prometheus - url: ${PROMETHEUS_PUSHGATEWAY_URL} + goal: 0.9999 diff --git a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml index 5b384404..cc75d5b4 100644 --- a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml +++ b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml @@ -1,39 +1,25 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_description: 99.99% of Prometheus requests return in less than 250ms -slo_name: latency -slo_target: 0.9999 -backend: - class: Prometheus - url: ${PROMETHEUS_URL} - method: query_sli - measurement: - expression: > - increase( - http_request_duration_seconds_bucket{handler="/metrics", code=~"2..",le="0.25"}[window] - ) - / ignoring (le) - increase( - http_request_duration_seconds_count{handler="/metrics", code=~"2.."}[window] - ) -exporters: - - class: Bigquery - project_id: rnm-shared-monitoring - dataset_id: slos - table_id: reports - - class: Stackdriver - project_id: rnm-shared-monitoring +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-latency + labels: + service_name: prom + feature_name: metrics + slo_name: latency +spec: + description: 99.99% of Prometheus requests return in less than 250ms + backend: prometheus + method: query_sli + exporters: + - bigquery + - cloud_monitoring + service_level_indicator: + expression: > + increase( + http_request_duration_seconds_bucket{handler="/metrics", code=~"2..",le="0.25"}[window] + ) + / ignoring (le) + increase( + http_request_duration_seconds_count{handler="/metrics", code=~"2.."}[window] + ) + goal: 0.9999 diff --git a/samples/stackdriver/slo_gae_app_availability.yaml b/samples/stackdriver/slo_gae_app_availability.yaml deleted file mode 100644 index 9da3ab32..00000000 --- a/samples/stackdriver/slo_gae_app_availability.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Availability of App Engine app -slo_name: availability -slo_target: 0.95 -backend: - class: Stackdriver - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" - ( metric.labels.response_code = 429 OR - metric.labels.response_code = 200 OR - metric.labels.response_code = 201 OR - metric.labels.response_code = 202 OR - metric.labels.response_code = 203 OR - metric.labels.response_code = 204 OR - metric.labels.response_code = 205 OR - metric.labels.response_code = 206 OR - metric.labels.response_code = 207 OR - metric.labels.response_code = 208 OR - metric.labels.response_code = 226 OR - metric.labels.response_code = 304 ) - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_gae_app_latency.yaml b/samples/stackdriver/slo_gae_app_latency.yaml deleted file mode 100644 index fdbcfbed..00000000 --- a/samples/stackdriver/slo_gae_app_latency.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Latency of App Engine app requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: Stackdriver - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_latencies" - resource.type="gae_app" - metric.labels.response_code >= 200 - metric.labels.response_code < 500 - good_below_threshold: true - threshold_bucket: 19 -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_lb_request_availability.yaml b/samples/stackdriver/slo_lb_request_availability.yaml deleted file mode 100644 index 000d48c2..00000000 --- a/samples/stackdriver/slo_lb_request_availability.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Availability of HTTP Load Balancer -slo_name: availability -slo_target: 0.98 -backend: - class: Stackdriver - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_lb_request_latency.yaml b/samples/stackdriver/slo_lb_request_latency.yaml deleted file mode 100644 index 1216825b..00000000 --- a/samples/stackdriver/slo_lb_request_latency.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Latency of HTTP Load Balancer < 724ms -slo_name: latency724ms -slo_target: 0.98 -backend: - class: Stackdriver - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/total_latencies" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - good_below_threshold: true - threshold_bucket: 19 -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_pubsub_subscription_throughput.yaml b/samples/stackdriver/slo_pubsub_subscription_throughput.yaml deleted file mode 100644 index 58ab1943..00000000 --- a/samples/stackdriver/slo_pubsub_subscription_throughput.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: pubsub -feature_name: subscription -slo_description: Throughput of Pub/Sub subscription -slo_name: throughput -slo_target: 0.95 -backend: - class: Stackdriver - project_id: "${STACKDRIVER_HOST_PROJECT_ID}" - method: good_bad_ratio - measurement: - filter_good: > - project="${PUBSUB_PROJECT_ID}" - metric.type="pubsub.googleapis.com/subscription/ack_message_count" - resource.type="pubsub_subscription" - filter_bad: > - project="${PUBSUB_PROJECT_ID}" - metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" - resource.type="pubsub_subscription" -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - -- class: Pubsub - project_id: ${PUBSUB_PROJECT_ID} - topic_name: ${PUBSUB_TOPIC_NAME} diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml deleted file mode 100644 index 8ee66be7..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Availability of App Engine app -slo_name: availability -slo_target: 0.95 -backend: - class: StackdriverServiceMonitoring - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" - ( metric.labels.response_code = 429 OR - metric.labels.response_code = 200 OR - metric.labels.response_code = 201 OR - metric.labels.response_code = 202 OR - metric.labels.response_code = 203 OR - metric.labels.response_code = 204 OR - metric.labels.response_code = 205 OR - metric.labels.response_code = 206 OR - metric.labels.response_code = 207 OR - metric.labels.response_code = 208 OR - metric.labels.response_code = 226 OR - metric.labels.response_code = 304 ) - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml deleted file mode 100644 index 2a43c4eb..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Availability of App Engine app -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - app_engine: - project_id: ${GAE_PROJECT_ID} - module_id: ${GAE_MODULE_ID} - availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml deleted file mode 100644 index 777e386a..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Latency of App Engine app requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_latencies" - resource.type="gae_app" - metric.labels.response_code >= 200 - metric.labels.response_code < 500 - range_min: 0 - range_max: 724 diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml deleted file mode 100644 index 3d91dffb..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Latency of App Engine app requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - app_engine: - project_id: ${GAE_PROJECT_ID} - module_id: ${GAE_MODULE_ID} - latency: - threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml deleted file mode 100644 index ad4dd6ee..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Availability of GKE service -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - mesh_istio: - mesh_uid: ${GKE_MESH_UID} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml deleted file mode 100644 index b4ab973f..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Availability of GKE service -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - cluster_istio: - project_id: ${GKE_PROJECT_ID} - location: ${GKE_LOCATION} - cluster_name: ${GKE_CLUSTER_NAME} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml deleted file mode 100644 index c2de6af2..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Latency of GKE service requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - mesh_istio: - mesh_uid: ${GKE_MESH_UID} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - latency: - threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml deleted file mode 100644 index e7643909..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Latency of GKE service requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - cluster_istio: - project_id: ${GKE_PROJECT_ID} - location: ${GKE_LOCATION} - cluster_name: ${GKE_CLUSTER_NAME} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - latency: - threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml b/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml deleted file mode 100644 index 428d411b..00000000 --- a/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Availability of HTTP Load Balancer -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" diff --git a/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml b/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml deleted file mode 100644 index 3e19d8e6..00000000 --- a/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Latency of HTTP Load Balancer < 724ms -slo_name: latency724ms -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/total_latencies" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - range_min: 0 - range_max: 724 # ms diff --git a/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml b/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml deleted file mode 100644 index cb678d3b..00000000 --- a/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# TODO: Doesn't work at the moment because Stackdriver Service Monitoring API -# does not support Gauge-type metrics. - -# --- -# service_name: pubsub -# feature_name: subscription -# slo_description: Throughput of Pub/Sub subscription -# slo_name: throughput -# slo_target: 0.95 -# backend: -# class: StackdriverServiceMonitoring -# project_id: "${STACKDRIVER_HOST_PROJECT_ID}" -# method: good_bad_ratio -# measurement: -# filter_good: > -# project="${PUBSUB_PROJECT_ID}" -# metric.type="pubsub.googleapis.com/subscription/ack_message_count" -# resource.type="pubsub_subscription" -# filter_bad: > -# project="${PUBSUB_PROJECT_ID}" -# metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" -# resource.type="pubsub_subscription" From 4a1564fbcd62b17ccf1889c8918cb4473f704dc9 Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Mon, 31 May 2021 13:46:49 +0200 Subject: [PATCH 2/8] Restore pre-migration state --- .../slo_gae_app_availability.yaml | 35 -------- .../cloud_monitoring/slo_gae_app_latency.yaml | 24 ------ .../slo_lb_request_availability.yaml | 27 ------ .../slo_lb_request_latency.yaml | 25 ------ .../slo_pubsub_subscription_throughput.yaml | 25 ------ .../slo_gae_app_availability.yaml | 36 -------- .../slo_gae_app_availability_basic.yaml | 20 ----- .../slo_gae_app_latency.yaml | 24 ------ .../slo_gae_app_latency_basic.yaml | 21 ----- .../slo_gke_app_availability_basic.yaml | 21 ----- ...gke_app_availability_basic_deprecated.yaml | 23 ----- .../slo_gke_app_latency_basic.yaml | 22 ----- .../slo_gke_app_latency_basic_deprecated.yaml | 24 ------ .../slo_lb_request_availability.yaml | 27 ------ .../slo_lb_request_latency.yaml | 25 ------ samples/config.yaml | 85 ------------------- ...slo_custom_app_availability_query_sli.yaml | 41 +++++---- .../slo_custom_app_availability_ratio.yaml | 41 +++++---- .../slo_dd_app_availability_query_sli.yaml | 49 +++++++---- .../slo_dd_app_availability_query_slo.yaml | 43 ++++++---- .../slo_dd_app_availability_ratio.yaml | 50 +++++++---- .../slo_dt_app_availability_ratio.yaml | 32 +++---- .../slo_dt_app_latency_threshold.yaml | 51 ++++++----- samples/elasticsearch/slo_elk_test_ratio.yaml | 60 ++++++++----- samples/error_budget_policy.yaml | 43 ++++++++++ samples/error_budget_policy_ssm.yaml | 27 ++++++ ...o_prom_metrics_availability_query_sli.yaml | 49 +++++++---- .../slo_prom_metrics_availability_ratio.yaml | 49 +++++++---- ...prom_metrics_latency_distribution_cut.yaml | 45 ++++++---- .../slo_prom_metrics_latency_query_sli.yaml | 64 ++++++++------ .../stackdriver/slo_gae_app_availability.yaml | 46 ++++++++++ samples/stackdriver/slo_gae_app_latency.yaml | 35 ++++++++ .../slo_lb_request_availability.yaml | 38 +++++++++ .../stackdriver/slo_lb_request_latency.yaml | 36 ++++++++ .../slo_pubsub_subscription_throughput.yaml | 39 +++++++++ .../slo_gae_app_availability.yaml | 44 ++++++++++ .../slo_gae_app_availability_basic.yaml | 28 ++++++ .../slo_gae_app_latency.yaml | 32 +++++++ .../slo_gae_app_latency_basic.yaml | 29 +++++++ .../slo_gke_app_availability_basic.yaml | 29 +++++++ ...gke_app_availability_basic_deprecated.yaml | 31 +++++++ .../slo_gke_app_latency_basic.yaml | 30 +++++++ .../slo_gke_app_latency_basic_deprecated.yaml | 32 +++++++ .../slo_lb_request_availability.yaml | 35 ++++++++ .../slo_lb_request_latency.yaml | 33 +++++++ ...zz_slo_pubsub_subscription_throughput.yaml | 35 ++++++++ 46 files changed, 978 insertions(+), 682 deletions(-) delete mode 100644 samples/cloud_monitoring/slo_gae_app_availability.yaml delete mode 100644 samples/cloud_monitoring/slo_gae_app_latency.yaml delete mode 100644 samples/cloud_monitoring/slo_lb_request_availability.yaml delete mode 100644 samples/cloud_monitoring/slo_lb_request_latency.yaml delete mode 100644 samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gae_app_availability.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gae_app_latency.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml delete mode 100644 samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml delete mode 100644 samples/cloud_service_monitoring/slo_lb_request_availability.yaml delete mode 100644 samples/cloud_service_monitoring/slo_lb_request_latency.yaml delete mode 100644 samples/config.yaml create mode 100644 samples/error_budget_policy.yaml create mode 100644 samples/error_budget_policy_ssm.yaml create mode 100644 samples/stackdriver/slo_gae_app_availability.yaml create mode 100644 samples/stackdriver/slo_gae_app_latency.yaml create mode 100644 samples/stackdriver/slo_lb_request_availability.yaml create mode 100644 samples/stackdriver/slo_lb_request_latency.yaml create mode 100644 samples/stackdriver/slo_pubsub_subscription_throughput.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml create mode 100644 samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml create mode 100644 samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml diff --git a/samples/cloud_monitoring/slo_gae_app_availability.yaml b/samples/cloud_monitoring/slo_gae_app_availability.yaml deleted file mode 100644 index 675f290f..00000000 --- a/samples/cloud_monitoring/slo_gae_app_availability.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gae-app-availability - labels: - service_name: gae - feature_name: app - slo_name: availability -spec: - description: Availability of App Engine app - backend: cloud_monitoring - method: good_bad_ratio - exporters: - - cloud_monitoring - service_level_indicator: - filter_good: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" - ( metric.labels.response_code = 429 OR - metric.labels.response_code = 200 OR - metric.labels.response_code = 201 OR - metric.labels.response_code = 202 OR - metric.labels.response_code = 203 OR - metric.labels.response_code = 204 OR - metric.labels.response_code = 205 OR - metric.labels.response_code = 206 OR - metric.labels.response_code = 207 OR - metric.labels.response_code = 208 OR - metric.labels.response_code = 226 OR - metric.labels.response_code = 304 ) - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - goal: 0.95 diff --git a/samples/cloud_monitoring/slo_gae_app_latency.yaml b/samples/cloud_monitoring/slo_gae_app_latency.yaml deleted file mode 100644 index 3f0bf38d..00000000 --- a/samples/cloud_monitoring/slo_gae_app_latency.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gae-app-latency724ms - labels: - service_name: gae - feature_name: app - slo_name: latency724ms -spec: - description: Latency of App Engine app requests < 724ms - backend: cloud_monitoring - method: distribution_cut - exporters: - - cloud_monitoring - service_level_indicator: - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_latencies" - resource.type="gae_app" - metric.labels.response_code >= 200 - metric.labels.response_code < 500 - good_below_threshold: true - threshold_bucket: 19 - goal: 0.999 diff --git a/samples/cloud_monitoring/slo_lb_request_availability.yaml b/samples/cloud_monitoring/slo_lb_request_availability.yaml deleted file mode 100644 index 0a9b6f57..00000000 --- a/samples/cloud_monitoring/slo_lb_request_availability.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: lb-request-availability - labels: - service_name: lb - feature_name: request - slo_name: availability -spec: - description: Availability of HTTP Load Balancer - backend: cloud_monitoring - method: good_bad_ratio - exporters: - - cloud_monitoring - service_level_indicator: - filter_good: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - goal: 0.98 diff --git a/samples/cloud_monitoring/slo_lb_request_latency.yaml b/samples/cloud_monitoring/slo_lb_request_latency.yaml deleted file mode 100644 index 56df0de2..00000000 --- a/samples/cloud_monitoring/slo_lb_request_latency.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: lb-request-latency724ms - labels: - service_name: lb - feature_name: request - slo_name: latency724ms -spec: - description: Latency of HTTP Load Balancer < 724ms - backend: cloud_monitoring - method: distribution_cut - exporters: - - cloud_monitoring - service_level_indicator: - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/total_latencies" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - good_below_threshold: true - threshold_bucket: 19 - goal: 0.98 diff --git a/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml b/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml deleted file mode 100644 index 1e2721c5..00000000 --- a/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: pubsub-subscription-throughput - labels: - service_name: pubsub - feature_name: subscription - slo_name: throughput -spec: - description: Throughput of Pub/Sub subscription - backend: cloud_monitoring - method: good_bad_ratio - exporters: - - cloud_monitoring - - pubsub - service_level_indicator: - filter_good: > - project="${PUBSUB_PROJECT_ID}" - metric.type="pubsub.googleapis.com/subscription/ack_message_count" - resource.type="pubsub_subscription" - filter_bad: > - project="${PUBSUB_PROJECT_ID}" - metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" - resource.type="pubsub_subscription" - goal: 0.95 diff --git a/samples/cloud_service_monitoring/slo_gae_app_availability.yaml b/samples/cloud_service_monitoring/slo_gae_app_availability.yaml deleted file mode 100644 index a2e97de3..00000000 --- a/samples/cloud_service_monitoring/slo_gae_app_availability.yaml +++ /dev/null @@ -1,36 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gae-app-availability - labels: - service_name: gae - feature_name: app - slo_name: availability -spec: - description: Availability of App Engine app - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: good_bad_ratio - exporters: [] - service_level_indicator: - filter_good: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" - ( metric.labels.response_code = 429 OR - metric.labels.response_code = 200 OR - metric.labels.response_code = 201 OR - metric.labels.response_code = 202 OR - metric.labels.response_code = 203 OR - metric.labels.response_code = 204 OR - metric.labels.response_code = 205 OR - metric.labels.response_code = 206 OR - metric.labels.response_code = 207 OR - metric.labels.response_code = 208 OR - metric.labels.response_code = 226 OR - metric.labels.response_code = 304 ) - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" - goal: 0.95 diff --git a/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml b/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml deleted file mode 100644 index dd6507ff..00000000 --- a/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gae-app-availability - labels: - service_name: gae - feature_name: app - slo_name: availability -spec: - description: Availability of App Engine app - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: basic - exporters: [] - service_level_indicator: - app_engine: - project_id: ${GAE_PROJECT_ID} - module_id: ${GAE_MODULE_ID} - availability: {} - goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gae_app_latency.yaml b/samples/cloud_service_monitoring/slo_gae_app_latency.yaml deleted file mode 100644 index c949fb88..00000000 --- a/samples/cloud_service_monitoring/slo_gae_app_latency.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gae-app-latency724ms - labels: - service_name: gae - feature_name: app - slo_name: latency724ms -spec: - description: Latency of App Engine app requests < 724ms - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: distribution_cut - exporters: [] - service_level_indicator: - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_latencies" - resource.type="gae_app" - metric.labels.response_code >= 200 - metric.labels.response_code < 500 - range_min: 0 - range_max: 724 - goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml b/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml deleted file mode 100644 index e648e99d..00000000 --- a/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gae-app-latency724ms - labels: - service_name: gae - feature_name: app - slo_name: latency724ms -spec: - description: Latency of App Engine app requests < 724ms - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: basic - exporters: [] - service_level_indicator: - app_engine: - project_id: ${GAE_PROJECT_ID} - module_id: ${GAE_MODULE_ID} - latency: - threshold: 724 # ms - goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml b/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml deleted file mode 100644 index 74c32976..00000000 --- a/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# apiVersion: sre.google.com/v2 -# kind: ServiceLevelObjective -# metadata: -# name: gke-service-availability -# labels: -# service_name: gke -# feature_name: service -# slo_name: availability -# spec: -# description: Availability of GKE service -# error_budget_policy: cloud_service_monitoring -# backend: cloud_service_monitoring -# method: basic -# exporters: [] -# service_level_indicator: -# mesh_istio: -# mesh_uid: ${GKE_MESH_UID} -# service_namespace: ${GKE_SERVICE_NAMESPACE} -# service_name: ${GKE_SERVICE_NAME} -# availability: {} -# goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml b/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml deleted file mode 100644 index ffa2c56e..00000000 --- a/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gke-service-availability - labels: - service_name: gke - feature_name: service - slo_name: availability -spec: - description: Availability of GKE service - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: basic - exporters: [] - service_level_indicator: - cluster_istio: - project_id: ${GKE_PROJECT_ID} - location: ${GKE_LOCATION} - cluster_name: ${GKE_CLUSTER_NAME} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - availability: {} - goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml b/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml deleted file mode 100644 index b0152afb..00000000 --- a/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# apiVersion: sre.google.com/v2 -# kind: ServiceLevelObjective -# metadata: -# name: gke-service-latency724ms -# labels: -# service_name: gke -# feature_name: service -# slo_name: latency724ms -# spec: -# description: Latency of GKE service requests < 724ms -# error_budget_policy: cloud_service_monitoring -# backend: cloud_service_monitoring -# method: basic -# exporters: [] -# service_level_indicator: -# mesh_istio: -# mesh_uid: ${GKE_MESH_UID} -# service_namespace: ${GKE_SERVICE_NAMESPACE} -# service_name: ${GKE_SERVICE_NAME} -# latency: -# threshold: 724 # ms -# goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml b/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml deleted file mode 100644 index 4c001c38..00000000 --- a/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: gke-service-latency724ms - labels: - service_name: gke - feature_name: service - slo_name: latency724ms -spec: - description: Latency of GKE service requests < 724ms - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: basic - exporters: [] - service_level_indicator: - cluster_istio: - project_id: ${GKE_PROJECT_ID} - location: ${GKE_LOCATION} - cluster_name: ${GKE_CLUSTER_NAME} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - latency: - threshold: 724 # ms - goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_lb_request_availability.yaml b/samples/cloud_service_monitoring/slo_lb_request_availability.yaml deleted file mode 100644 index 6bc1df9c..00000000 --- a/samples/cloud_service_monitoring/slo_lb_request_availability.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: lb-request-availability - labels: - service_name: lb - feature_name: request - slo_name: availability -spec: - description: Availability of HTTP Load Balancer - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: good_bad_ratio - exporters: [] - service_level_indicator: - filter_good: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_lb_request_latency.yaml b/samples/cloud_service_monitoring/slo_lb_request_latency.yaml deleted file mode 100644 index 2b326176..00000000 --- a/samples/cloud_service_monitoring/slo_lb_request_latency.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: lb-request-latency724ms - labels: - service_name: lb - feature_name: request - slo_name: latency724ms -spec: - description: Latency of HTTP Load Balancer < 724ms - error_budget_policy: cloud_service_monitoring - backend: cloud_service_monitoring - method: distribution_cut - exporters: [] - service_level_indicator: - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/total_latencies" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - range_min: 0 - range_max: 724 # ms - goal: 0.98 diff --git a/samples/config.yaml b/samples/config.yaml deleted file mode 100644 index 58661ceb..00000000 --- a/samples/config.yaml +++ /dev/null @@ -1,85 +0,0 @@ ---- -backends: - cloud_monitoring: - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - cloud_service_monitoring: - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - custom.custom_backend.CustomBackend: {} - datadog: - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - dynatrace: - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - elasticsearch: - url: ${ELASTICSEARCH_URL} - prometheus: - url: ${PROMETHEUS_URL} - -exporters: - bigquery: - project_id: ${BIGQUERY_PROJECT_ID} - dataset_id: slos - table_id: reports - cloud_monitoring: - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - custom.custom_exporter.CustomMetricExporter: {} - datadog: - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - dynatrace: - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - prometheus: - url: ${PROMETHEUS_PUSHGATEWAY_URL} - pubsub: - project_id: ${PUBSUB_PROJECT_ID} - topic_name: ${PUBSUB_TOPIC_NAME} - -error_budget_policies: - default: - steps: - - name: 1 hour - burn_rate_threshold: 9 - alert: true - message_alert: Page to defend the SLO - message_ok: Last hour on track - window: 3600 - - name: 12 hours - burn_rate_threshold: 3 - alert: true - message_alert: Page to defend the SLO - message_ok: Last 12 hours on track - window: 43200 - - name: 7 days - burn_rate_threshold: 1.5 - alert: false - message_alert: Dev team dedicates 25% of engineers to the reliability backlog - message_ok: Last week on track - window: 604800 - - name: 28 days - burn_rate_threshold: 1 - alert: false - message_alert: Freeze release, unless related to reliability or security - message_ok: Unfreeze release, per the agreed roll-out policy - window: 2419200 - cloud_service_monitoring: - steps: - - name: 24 hours - burn_rate_threshold: 3 - alert: true - message_alert: Page to defend the SLO - message_ok: Last 24 hours on track - window: 86400 - - name: 7 days - burn_rate_threshold: 1.5 - alert: false - message_alert: Dev team dedicates 25% of engineers to the reliability backlog - message_ok: Last week on track - window: 604800 - - name: 28 days - burn_rate_threshold: 1 - alert: false - message_alert: Freeze release, unless related to reliability or security - message_ok: Unfreeze release, per the agreed roll-out policy - window: 2419200 diff --git a/samples/custom/slo_custom_app_availability_query_sli.yaml b/samples/custom/slo_custom_app_availability_query_sli.yaml index 858a439a..acfb64dd 100644 --- a/samples/custom/slo_custom_app_availability_query_sli.yaml +++ b/samples/custom/slo_custom_app_availability_query_sli.yaml @@ -1,16 +1,25 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: custom-test-availability-sli - labels: - service_name: custom - feature_name: test - slo_name: availability-sli -spec: - description: 99.99% of fake requests to custom backends are valid - backend: custom.custom_backend.CustomBackend - method: query_sli - exporters: - - custom.custom_exporter.CustomMetricExporter - service_level_indicator: {} - goal: 0.999 +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: custom +feature_name: test +slo_description: 99.99% of fake requests to custom backends are valid +slo_name: availability-sli +slo_target: 0.999 +backend: + class: custom.custom_backend.CustomBackend + method: query_sli +exporters: +- class: custom.custom_exporter.CustomMetricExporter + class: custom.custom_exporter.CustomSLOExporter diff --git a/samples/custom/slo_custom_app_availability_ratio.yaml b/samples/custom/slo_custom_app_availability_ratio.yaml index dec63465..6ba841f5 100644 --- a/samples/custom/slo_custom_app_availability_ratio.yaml +++ b/samples/custom/slo_custom_app_availability_ratio.yaml @@ -1,16 +1,25 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: custom-test-availability-ratio - labels: - service_name: custom - feature_name: test - slo_name: availability-ratio -spec: - description: 99.99% of fake requests to custom backends are valid - backend: custom.custom_backend.CustomBackend - method: good_bad_ratio - exporters: - - custom.custom_exporter.CustomMetricExporter - service_level_indicator: {} - goal: 0.999 +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: custom +feature_name: test +slo_description: 99.99% of fake requests to custom backends are valid +slo_name: availability-ratio +slo_target: 0.999 +backend: + class: custom.custom_backend.CustomBackend + method: good_bad_ratio +exporters: +- class: custom.custom_exporter.CustomMetricExporter + class: custom.custom_exporter.CustomSLOExporter diff --git a/samples/datadog/slo_dd_app_availability_query_sli.yaml b/samples/datadog/slo_dd_app_availability_query_sli.yaml index 2fe9ed36..47ad78b5 100644 --- a/samples/datadog/slo_dd_app_availability_query_sli.yaml +++ b/samples/datadog/slo_dd_app_availability_query_sli.yaml @@ -1,18 +1,31 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: dd-app-availability - labels: - service_name: dd - feature_name: app - slo_name: availability -spec: - description: 99% of app requests return a valid HTTP code - backend: datadog - method: query_sli - exporters: - - datadog - service_level_indicator: - query: sum:app.requests.count{http.path:/, http.status_code_class:2xx}.as_count() - / sum:app.requests.count{http.path:/}.as_count() - goal: 0.99 +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: dd +feature_name: app +slo_name: availability +slo_description: 99% of app requests return a valid HTTP code +slo_target: 0.99 +backend: + class: Datadog + method: query_sli + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} + # api_host: api.datadoghq.eu # uncomment to use EU site + measurement: + query: sum:app.requests.count{http.path:/, http.status_code_class:2xx}.as_count() / sum:app.requests.count{http.path:/}.as_count() +exporters: + - class: Datadog + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} diff --git a/samples/datadog/slo_dd_app_availability_query_slo.yaml b/samples/datadog/slo_dd_app_availability_query_slo.yaml index 3465a64d..bc1c5b19 100644 --- a/samples/datadog/slo_dd_app_availability_query_slo.yaml +++ b/samples/datadog/slo_dd_app_availability_query_slo.yaml @@ -1,16 +1,27 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: dd-app-availability - labels: - service_name: dd - feature_name: app - slo_name: availability -spec: - description: 99% of app requests return a valid HTTP code - backend: datadog - method: query_slo - exporters: [] - service_level_indicator: - slo_id: ${DATADOG_SLO_ID} - goal: 0.99 +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: dd +feature_name: app +slo_name: availability +slo_description: 99% of app requests return a valid HTTP code +slo_target: 0.99 +backend: + class: Datadog + method: query_slo + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} + # api_host: api.datadoghq.eu # uncomment to use EU site + measurement: + slo_id: ${DATADOG_SLO_ID} diff --git a/samples/datadog/slo_dd_app_availability_ratio.yaml b/samples/datadog/slo_dd_app_availability_ratio.yaml index aa8be867..e524b23d 100644 --- a/samples/datadog/slo_dd_app_availability_ratio.yaml +++ b/samples/datadog/slo_dd_app_availability_ratio.yaml @@ -1,18 +1,32 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: dd-app-availability - labels: - service_name: dd - feature_name: app - slo_name: availability -spec: - description: 99% of app requests return a valid HTTP code - backend: datadog - method: good_bad_ratio - exporters: - - datadog - service_level_indicator: - query_good: app.requests.count{http.path:/, http.status_code_class:2xx} - query_valid: app.requests.count{http.path:/} - goal: 0.99 +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: dd +feature_name: app +slo_name: availability +slo_description: 99% of app requests return a valid HTTP code +slo_target: 0.99 +backend: + class: Datadog + method: good_bad_ratio + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} + # api_host: api.datadoghq.eu # uncomment to use EU site + measurement: + query_good: app.requests.count{http.path:/, http.status_code_class:2xx} + query_valid: app.requests.count{http.path:/} +exporters: + - class: Datadog + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} diff --git a/samples/dynatrace/slo_dt_app_availability_ratio.yaml b/samples/dynatrace/slo_dt_app_availability_ratio.yaml index af53cd27..ac705550 100644 --- a/samples/dynatrace/slo_dt_app_availability_ratio.yaml +++ b/samples/dynatrace/slo_dt_app_availability_ratio.yaml @@ -12,25 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: dt-app-availability - labels: - service_name: dt - feature_name: app - slo_name: availability -spec: - description: 99.9% of app requests return a good HTTP code - backend: dynatrace - method: good_bad_ratio - exporters: - - dynatrace - service_level_indicator: +service_name: dt +feature_name: app +slo_name: availability +slo_description: 99.9% of app requests return a good HTTP code +slo_target: 0.999 +backend: + class: Dynatrace + method: good_bad_ratio + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + measurement: query_good: metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) entity_selector: type(HOST) query_valid: metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod))) entity_selector: type(HOST) - goal: 0.999 +exporters: +- class: Dynatrace + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + metric_timeseries_id: custom:slo.error_budget_burn_rate diff --git a/samples/dynatrace/slo_dt_app_latency_threshold.yaml b/samples/dynatrace/slo_dt_app_latency_threshold.yaml index d12eda9a..bd97bb3d 100644 --- a/samples/dynatrace/slo_dt_app_latency_threshold.yaml +++ b/samples/dynatrace/slo_dt_app_latency_threshold.yaml @@ -1,20 +1,33 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: dt-app-latency - labels: - service_name: dt - feature_name: app - slo_name: latency -spec: - description: 99.9% of app 2xx requests return within 50ms - backend: dynatrace - method: threshold - exporters: - - dynatrace - service_level_indicator: +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: dt +feature_name: app +slo_name: latency +slo_description: 99.9% of app 2xx requests return within 50ms +slo_target: 0.999 +backend: + class: Dynatrace + method: threshold + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + measurement: query_valid: - metric_selector: ext:app.request_latency:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) - entity_selector: type(HOST) - threshold: 50000 # us - goal: 0.999 + metric_selector: ext:app.request_latency:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) + entity_selector: type(HOST) + threshold: 50000 # us +exporters: +- class: Dynatrace + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} diff --git a/samples/elasticsearch/slo_elk_test_ratio.yaml b/samples/elasticsearch/slo_elk_test_ratio.yaml index 3c3be695..2114f902 100644 --- a/samples/elasticsearch/slo_elk_test_ratio.yaml +++ b/samples/elasticsearch/slo_elk_test_ratio.yaml @@ -1,27 +1,41 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: elk-test-errors - labels: - service_name: elk - feature_name: test - slo_name: errors -spec: - description: > - SLO for random test data generated with the - https://github.com/oliver006/elasticsearch-test-data - backend: elasticsearch - method: good_bad_ratio - exporters: - - pubsub - - cloud_monitoring - service_level_indicator: - index: test_data - date_field: last_updated - query_good: {} +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: elk +feature_name: test +slo_description: > + SLO for random test data generated with the + https://github.com/oliver006/elasticsearch-test-data +slo_name: errors +slo_target: 1 +backend: + class: Elasticsearch + url: ${ELASTICSEARCH_URL} + method: good_bad_ratio + measurement: + index: test_data + date_field: last_updated + query_good: {} query_bad: must: term: - name: JAgOZE8 + name: JAgOZE8 - goal: 1 +exporters: +- class: Pubsub + project_id: ${PUBSUB_PROJECT_ID} + topic_name: ${PUBSUB_TOPIC_NAME} + +- class: Stackdriver + project_id: ${PUBSUB_PROJECT_ID} diff --git a/samples/error_budget_policy.yaml b/samples/error_budget_policy.yaml new file mode 100644 index 00000000..d491fc97 --- /dev/null +++ b/samples/error_budget_policy.yaml @@ -0,0 +1,43 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- error_budget_policy_step_name: 1 hour + measurement_window_seconds: 3600 + alerting_burn_rate_threshold: 9 + urgent_notification: true + overburned_consequence_message: Page to defend the SLO + achieved_consequence_message: Last hour on track + +- error_budget_policy_step_name: 12 hours + measurement_window_seconds: 43200 + alerting_burn_rate_threshold: 3 + urgent_notification: true + overburned_consequence_message: Page to defend the SLO + achieved_consequence_message: Last 12 hours on track + +- error_budget_policy_step_name: 7 days + measurement_window_seconds: 604800 + alerting_burn_rate_threshold: 1.5 + urgent_notification: false + overburned_consequence_message: Dev team dedicates 25% of engineers to the + reliability backlog + achieved_consequence_message: Last week on track + +- error_budget_policy_step_name: 28 days + measurement_window_seconds: 2419200 + alerting_burn_rate_threshold: 1 + urgent_notification: false + overburned_consequence_message: Freeze release, unless related to reliability + or security + achieved_consequence_message: Unfreeze release, per the agreed roll-out policy diff --git a/samples/error_budget_policy_ssm.yaml b/samples/error_budget_policy_ssm.yaml new file mode 100644 index 00000000..9abeef1b --- /dev/null +++ b/samples/error_budget_policy_ssm.yaml @@ -0,0 +1,27 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- error_budget_policy_step_name: 24 hours + measurement_window_seconds: 86400 + alerting_burn_rate_threshold: 4 + urgent_notification: true + overburned_consequence_message: Page to defend the SLO + achieved_consequence_message: Last 24 hours on track + +- error_budget_policy_step_name: 48 hours + measurement_window_seconds: 172800 + alerting_burn_rate_threshold: 2 + urgent_notification: true + overburned_consequence_message: Page to defend the SLO + achieved_consequence_message: Last 48 hours on track diff --git a/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml b/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml index 7f4f6718..05076a04 100644 --- a/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml +++ b/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml @@ -1,20 +1,35 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: prom-metrics-availability - labels: - service_name: prom - feature_name: metrics - slo_name: availability -spec: - description: 99.9% of Prometheus requests return a good HTTP code - backend: prometheus - method: query_sli - exporters: - - prometheus - service_level_indicator: - expression: > +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: prom +feature_name: metrics +slo_name: availability +slo_description: 99.9% of Prometheus requests return a good HTTP code +slo_target: 0.999 +backend: + class: Prometheus + method: query_sli + url: ${PROMETHEUS_URL} + # Basic auth example: + # headers: + # Content-Type: application/json + # Authorization: Basic b2s6cGFzcW== # username:password base64-encoded + measurement: + expression: > sum(rate(prometheus_http_requests_total{handler="/metrics", code=~"2.."}[window])) / sum(rate(prometheus_http_requests_total{handler="/metrics"}[window])) - goal: 0.999 +exporters: +- class: Prometheus + url: ${PROMETHEUS_PUSHGATEWAY_URL} diff --git a/samples/prometheus/slo_prom_metrics_availability_ratio.yaml b/samples/prometheus/slo_prom_metrics_availability_ratio.yaml index 7e4db0cf..23b5171e 100644 --- a/samples/prometheus/slo_prom_metrics_availability_ratio.yaml +++ b/samples/prometheus/slo_prom_metrics_availability_ratio.yaml @@ -1,19 +1,34 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: prom-metrics-availability - labels: - service_name: prom - feature_name: metrics - slo_name: availability -spec: - description: 99.9% of Prometheus requests return a good HTTP code - backend: prometheus - method: good_bad_ratio - exporters: - - prometheus - service_level_indicator: - filter_good: prometheus_http_requests_total{handler="/metrics", code=~"2.."} +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: prom +feature_name: metrics +slo_name: availability +slo_description: 99.9% of Prometheus requests return a good HTTP code +slo_target: 0.999 +backend: + class: Prometheus + method: good_bad_ratio + url: ${PROMETHEUS_URL} + # Basic auth example: + # headers: + # Content-Type: application/json + # Authorization: Basic b2s6cGFzcW== # username:password base64-encoded + measurement: + filter_good: prometheus_http_requests_total{handler="/metrics", code=~"2.."} filter_valid: prometheus_http_requests_total{handler="/metrics"} # filter_bad: prometheus_http_requests_total{code=~"5..", handler="/metrics"} # alternative to filter_valid field - goal: 0.999 +exporters: +- class: Prometheus + url: ${PROMETHEUS_PUSHGATEWAY_URL} diff --git a/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml b/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml index 95ea93d2..b2b8da92 100644 --- a/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml +++ b/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml @@ -1,18 +1,29 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: prom-metrics-latency - labels: - service_name: prom - feature_name: metrics - slo_name: latency -spec: - description: 99.99% of Prometheus requests return in less than 250ms - backend: prometheus - method: distribution_cut - exporters: - - prometheus - service_level_indicator: - expression: http_request_duration_seconds_bucket{handler="/metrics", code=~"2.."} +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: prom +feature_name: metrics +slo_description: 99.99% of Prometheus requests return in less than 250ms +slo_name: latency +slo_target: 0.9999 +backend: + class: Prometheus + url: ${PROMETHEUS_URL} + method: distribution_cut + measurement: + expression: http_request_duration_seconds_bucket{handler="/metrics", code=~"2.."} threshold_bucket: 0.25 # in seconds, corresponds to the `le` (less than) PromQL label - goal: 0.9999 +exporters: +- class: Prometheus + url: ${PROMETHEUS_PUSHGATEWAY_URL} diff --git a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml index cc75d5b4..5b384404 100644 --- a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml +++ b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml @@ -1,25 +1,39 @@ -apiVersion: sre.google.com/v2 -kind: ServiceLevelObjective -metadata: - name: prom-metrics-latency - labels: - service_name: prom - feature_name: metrics - slo_name: latency -spec: - description: 99.99% of Prometheus requests return in less than 250ms - backend: prometheus - method: query_sli - exporters: - - bigquery - - cloud_monitoring - service_level_indicator: - expression: > - increase( - http_request_duration_seconds_bucket{handler="/metrics", code=~"2..",le="0.25"}[window] - ) - / ignoring (le) - increase( - http_request_duration_seconds_count{handler="/metrics", code=~"2.."}[window] - ) - goal: 0.9999 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: prom +feature_name: metrics +slo_description: 99.99% of Prometheus requests return in less than 250ms +slo_name: latency +slo_target: 0.9999 +backend: + class: Prometheus + url: ${PROMETHEUS_URL} + method: query_sli + measurement: + expression: > + increase( + http_request_duration_seconds_bucket{handler="/metrics", code=~"2..",le="0.25"}[window] + ) + / ignoring (le) + increase( + http_request_duration_seconds_count{handler="/metrics", code=~"2.."}[window] + ) +exporters: + - class: Bigquery + project_id: rnm-shared-monitoring + dataset_id: slos + table_id: reports + - class: Stackdriver + project_id: rnm-shared-monitoring diff --git a/samples/stackdriver/slo_gae_app_availability.yaml b/samples/stackdriver/slo_gae_app_availability.yaml new file mode 100644 index 00000000..9da3ab32 --- /dev/null +++ b/samples/stackdriver/slo_gae_app_availability.yaml @@ -0,0 +1,46 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gae +feature_name: app +slo_description: Availability of App Engine app +slo_name: availability +slo_target: 0.95 +backend: + class: Stackdriver + method: good_bad_ratio + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_good: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + ( metric.labels.response_code = 429 OR + metric.labels.response_code = 200 OR + metric.labels.response_code = 201 OR + metric.labels.response_code = 202 OR + metric.labels.response_code = 203 OR + metric.labels.response_code = 204 OR + metric.labels.response_code = 205 OR + metric.labels.response_code = 206 OR + metric.labels.response_code = 207 OR + metric.labels.response_code = 208 OR + metric.labels.response_code = 226 OR + metric.labels.response_code = 304 ) + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" +exporters: +- class: Stackdriver + project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_gae_app_latency.yaml b/samples/stackdriver/slo_gae_app_latency.yaml new file mode 100644 index 00000000..fdbcfbed --- /dev/null +++ b/samples/stackdriver/slo_gae_app_latency.yaml @@ -0,0 +1,35 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gae +feature_name: app +slo_description: Latency of App Engine app requests < 724ms +slo_name: latency724ms +slo_target: 0.999 +backend: + class: Stackdriver + method: distribution_cut + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_latencies" + resource.type="gae_app" + metric.labels.response_code >= 200 + metric.labels.response_code < 500 + good_below_threshold: true + threshold_bucket: 19 +exporters: +- class: Stackdriver + project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_lb_request_availability.yaml b/samples/stackdriver/slo_lb_request_availability.yaml new file mode 100644 index 00000000..000d48c2 --- /dev/null +++ b/samples/stackdriver/slo_lb_request_availability.yaml @@ -0,0 +1,38 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: lb +feature_name: request +slo_description: Availability of HTTP Load Balancer +slo_name: availability +slo_target: 0.98 +backend: + class: Stackdriver + method: good_bad_ratio + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_good: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" +exporters: +- class: Stackdriver + project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_lb_request_latency.yaml b/samples/stackdriver/slo_lb_request_latency.yaml new file mode 100644 index 00000000..1216825b --- /dev/null +++ b/samples/stackdriver/slo_lb_request_latency.yaml @@ -0,0 +1,36 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: lb +feature_name: request +slo_description: Latency of HTTP Load Balancer < 724ms +slo_name: latency724ms +slo_target: 0.98 +backend: + class: Stackdriver + method: distribution_cut + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/total_latencies" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + good_below_threshold: true + threshold_bucket: 19 +exporters: +- class: Stackdriver + project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_pubsub_subscription_throughput.yaml b/samples/stackdriver/slo_pubsub_subscription_throughput.yaml new file mode 100644 index 00000000..58ab1943 --- /dev/null +++ b/samples/stackdriver/slo_pubsub_subscription_throughput.yaml @@ -0,0 +1,39 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: pubsub +feature_name: subscription +slo_description: Throughput of Pub/Sub subscription +slo_name: throughput +slo_target: 0.95 +backend: + class: Stackdriver + project_id: "${STACKDRIVER_HOST_PROJECT_ID}" + method: good_bad_ratio + measurement: + filter_good: > + project="${PUBSUB_PROJECT_ID}" + metric.type="pubsub.googleapis.com/subscription/ack_message_count" + resource.type="pubsub_subscription" + filter_bad: > + project="${PUBSUB_PROJECT_ID}" + metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" + resource.type="pubsub_subscription" +exporters: +- class: Stackdriver + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + +- class: Pubsub + project_id: ${PUBSUB_PROJECT_ID} + topic_name: ${PUBSUB_TOPIC_NAME} diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml new file mode 100644 index 00000000..8ee66be7 --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml @@ -0,0 +1,44 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gae +feature_name: app +slo_description: Availability of App Engine app +slo_name: availability +slo_target: 0.95 +backend: + class: StackdriverServiceMonitoring + method: good_bad_ratio + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_good: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + ( metric.labels.response_code = 429 OR + metric.labels.response_code = 200 OR + metric.labels.response_code = 201 OR + metric.labels.response_code = 202 OR + metric.labels.response_code = 203 OR + metric.labels.response_code = 204 OR + metric.labels.response_code = 205 OR + metric.labels.response_code = 206 OR + metric.labels.response_code = 207 OR + metric.labels.response_code = 208 OR + metric.labels.response_code = 226 OR + metric.labels.response_code = 304 ) + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml new file mode 100644 index 00000000..2a43c4eb --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml @@ -0,0 +1,28 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gae +feature_name: app +slo_description: Availability of App Engine app +slo_name: availability +slo_target: 0.98 +backend: + class: StackdriverServiceMonitoring + method: basic + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + app_engine: + project_id: ${GAE_PROJECT_ID} + module_id: ${GAE_MODULE_ID} + availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml new file mode 100644 index 00000000..777e386a --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml @@ -0,0 +1,32 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gae +feature_name: app +slo_description: Latency of App Engine app requests < 724ms +slo_name: latency724ms +slo_target: 0.999 +backend: + class: StackdriverServiceMonitoring + method: distribution_cut + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_latencies" + resource.type="gae_app" + metric.labels.response_code >= 200 + metric.labels.response_code < 500 + range_min: 0 + range_max: 724 diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml new file mode 100644 index 00000000..3d91dffb --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml @@ -0,0 +1,29 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gae +feature_name: app +slo_description: Latency of App Engine app requests < 724ms +slo_name: latency724ms +slo_target: 0.999 +backend: + class: StackdriverServiceMonitoring + method: basic + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + app_engine: + project_id: ${GAE_PROJECT_ID} + module_id: ${GAE_MODULE_ID} + latency: + threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml new file mode 100644 index 00000000..ad4dd6ee --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml @@ -0,0 +1,29 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gke +feature_name: service +slo_description: Availability of GKE service +slo_name: availability +slo_target: 0.98 +backend: + class: StackdriverServiceMonitoring + method: basic + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + mesh_istio: + mesh_uid: ${GKE_MESH_UID} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml new file mode 100644 index 00000000..b4ab973f --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml @@ -0,0 +1,31 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gke +feature_name: service +slo_description: Availability of GKE service +slo_name: availability +slo_target: 0.98 +backend: + class: StackdriverServiceMonitoring + method: basic + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + cluster_istio: + project_id: ${GKE_PROJECT_ID} + location: ${GKE_LOCATION} + cluster_name: ${GKE_CLUSTER_NAME} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml new file mode 100644 index 00000000..c2de6af2 --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml @@ -0,0 +1,30 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gke +feature_name: service +slo_description: Latency of GKE service requests < 724ms +slo_name: latency724ms +slo_target: 0.999 +backend: + class: StackdriverServiceMonitoring + method: basic + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + mesh_istio: + mesh_uid: ${GKE_MESH_UID} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + latency: + threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml new file mode 100644 index 00000000..e7643909 --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml @@ -0,0 +1,32 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: gke +feature_name: service +slo_description: Latency of GKE service requests < 724ms +slo_name: latency724ms +slo_target: 0.999 +backend: + class: StackdriverServiceMonitoring + method: basic + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + cluster_istio: + project_id: ${GKE_PROJECT_ID} + location: ${GKE_LOCATION} + cluster_name: ${GKE_CLUSTER_NAME} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + latency: + threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml b/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml new file mode 100644 index 00000000..428d411b --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml @@ -0,0 +1,35 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: lb +feature_name: request +slo_description: Availability of HTTP Load Balancer +slo_name: availability +slo_target: 0.98 +backend: + class: StackdriverServiceMonitoring + method: good_bad_ratio + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_good: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" diff --git a/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml b/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml new file mode 100644 index 00000000..3e19d8e6 --- /dev/null +++ b/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml @@ -0,0 +1,33 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +service_name: lb +feature_name: request +slo_description: Latency of HTTP Load Balancer < 724ms +slo_name: latency724ms +slo_target: 0.98 +backend: + class: StackdriverServiceMonitoring + method: distribution_cut + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + measurement: + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/total_latencies" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + range_min: 0 + range_max: 724 # ms diff --git a/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml b/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml new file mode 100644 index 00000000..cb678d3b --- /dev/null +++ b/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml @@ -0,0 +1,35 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# TODO: Doesn't work at the moment because Stackdriver Service Monitoring API +# does not support Gauge-type metrics. + +# --- +# service_name: pubsub +# feature_name: subscription +# slo_description: Throughput of Pub/Sub subscription +# slo_name: throughput +# slo_target: 0.95 +# backend: +# class: StackdriverServiceMonitoring +# project_id: "${STACKDRIVER_HOST_PROJECT_ID}" +# method: good_bad_ratio +# measurement: +# filter_good: > +# project="${PUBSUB_PROJECT_ID}" +# metric.type="pubsub.googleapis.com/subscription/ack_message_count" +# resource.type="pubsub_subscription" +# filter_bad: > +# project="${PUBSUB_PROJECT_ID}" +# metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" +# resource.type="pubsub_subscription" From 59cf8bfd46106fb785e0627a39e14dac58412519 Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Mon, 31 May 2021 13:58:36 +0200 Subject: [PATCH 3/8] Fix typo --- samples/custom/slo_custom_app_availability_query_sli.yaml | 1 - samples/custom/slo_custom_app_availability_ratio.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/samples/custom/slo_custom_app_availability_query_sli.yaml b/samples/custom/slo_custom_app_availability_query_sli.yaml index acfb64dd..94939438 100644 --- a/samples/custom/slo_custom_app_availability_query_sli.yaml +++ b/samples/custom/slo_custom_app_availability_query_sli.yaml @@ -21,5 +21,4 @@ backend: class: custom.custom_backend.CustomBackend method: query_sli exporters: -- class: custom.custom_exporter.CustomMetricExporter class: custom.custom_exporter.CustomSLOExporter diff --git a/samples/custom/slo_custom_app_availability_ratio.yaml b/samples/custom/slo_custom_app_availability_ratio.yaml index 6ba841f5..b3a59bb7 100644 --- a/samples/custom/slo_custom_app_availability_ratio.yaml +++ b/samples/custom/slo_custom_app_availability_ratio.yaml @@ -22,4 +22,3 @@ backend: method: good_bad_ratio exporters: - class: custom.custom_exporter.CustomMetricExporter - class: custom.custom_exporter.CustomSLOExporter From f770f77f10b1307a31a0016648e96f40baadc496 Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Mon, 31 May 2021 13:58:55 +0200 Subject: [PATCH 4/8] Revert "Fix typo" This reverts commit 59cf8bfd46106fb785e0627a39e14dac58412519. --- samples/custom/slo_custom_app_availability_query_sli.yaml | 1 + samples/custom/slo_custom_app_availability_ratio.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/samples/custom/slo_custom_app_availability_query_sli.yaml b/samples/custom/slo_custom_app_availability_query_sli.yaml index 94939438..acfb64dd 100644 --- a/samples/custom/slo_custom_app_availability_query_sli.yaml +++ b/samples/custom/slo_custom_app_availability_query_sli.yaml @@ -21,4 +21,5 @@ backend: class: custom.custom_backend.CustomBackend method: query_sli exporters: +- class: custom.custom_exporter.CustomMetricExporter class: custom.custom_exporter.CustomSLOExporter diff --git a/samples/custom/slo_custom_app_availability_ratio.yaml b/samples/custom/slo_custom_app_availability_ratio.yaml index b3a59bb7..6ba841f5 100644 --- a/samples/custom/slo_custom_app_availability_ratio.yaml +++ b/samples/custom/slo_custom_app_availability_ratio.yaml @@ -22,3 +22,4 @@ backend: method: good_bad_ratio exporters: - class: custom.custom_exporter.CustomMetricExporter + class: custom.custom_exporter.CustomSLOExporter From 0adc2e88d8371d1cc80e4797ec8cdbd0cf563eb5 Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Mon, 31 May 2021 13:59:09 +0200 Subject: [PATCH 5/8] Fix typos --- samples/custom/slo_custom_app_availability_query_sli.yaml | 2 +- samples/custom/slo_custom_app_availability_ratio.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/custom/slo_custom_app_availability_query_sli.yaml b/samples/custom/slo_custom_app_availability_query_sli.yaml index acfb64dd..183f3b08 100644 --- a/samples/custom/slo_custom_app_availability_query_sli.yaml +++ b/samples/custom/slo_custom_app_availability_query_sli.yaml @@ -22,4 +22,4 @@ backend: method: query_sli exporters: - class: custom.custom_exporter.CustomMetricExporter - class: custom.custom_exporter.CustomSLOExporter +- class: custom.custom_exporter.CustomSLOExporter diff --git a/samples/custom/slo_custom_app_availability_ratio.yaml b/samples/custom/slo_custom_app_availability_ratio.yaml index 6ba841f5..6a8ac607 100644 --- a/samples/custom/slo_custom_app_availability_ratio.yaml +++ b/samples/custom/slo_custom_app_availability_ratio.yaml @@ -22,4 +22,4 @@ backend: method: good_bad_ratio exporters: - class: custom.custom_exporter.CustomMetricExporter - class: custom.custom_exporter.CustomSLOExporter +- class: custom.custom_exporter.CustomSLOExporter From f72c6543d2ed1697e96d3755d30b3f095a6f377d Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Mon, 31 May 2021 14:47:24 +0200 Subject: [PATCH 6/8] Fix typos --- samples/config.yaml | 75 +++++++++++++++++++ .../slo_prom_metrics_latency_query_sli.yaml | 8 +- .../slo_pubsub_subscription_throughput.yaml | 4 - 3 files changed, 77 insertions(+), 10 deletions(-) create mode 100644 samples/config.yaml diff --git a/samples/config.yaml b/samples/config.yaml new file mode 100644 index 00000000..390e9ade --- /dev/null +++ b/samples/config.yaml @@ -0,0 +1,75 @@ +--- + +backends: + cloud_monitoring: + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + cloud_service_monitoring: + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + custom.custom_backend.CustomBackend: {} + datadog: + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} + dynatrace: + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + elasticsearch: + url: ${ELASTICSEARCH_URL} + prometheus: + url: ${PROMETHEUS_URL} + +exporters: + bigquery: + project_id: rnm-shared-monitoring + dataset_id: slos + table_id: reports + cloud_monitoring: + project_id: ${STACKDRIVER_HOST_PROJECT_ID} + cloud_monitoring/test: + project_id: rnm-shared-monitoring + cloud_monitoring/test2: + project_id: ${PUBSUB_PROJECT_ID} + custom.custom_exporter.CustomMetricExporter: {} + custom.custom_exporter.CustomSLOExporter: {} + datadog: + api_key: ${DATADOG_API_KEY} + app_key: ${DATADOG_APP_KEY} + dynatrace: + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + metric_timeseries_id: custom:slo.error_budget_burn_rate + dynatrace/test: + api_url: ${DYNATRACE_API_URL} + api_token: ${DYNATRACE_API_TOKEN} + prometheus: + url: ${PROMETHEUS_PUSHGATEWAY_URL} + pubsub: + project_id: ${PUBSUB_PROJECT_ID} + topic_name: ${PUBSUB_TOPIC_NAME} + +error_budget_policies: + default: + steps: + - name: 1 hour + burn_rate_threshold: 9 + alert: true + message_alert: Page to defend the SLO + message_ok: Last hour on track + window: 3600 + - name: 12 hours + burn_rate_threshold: 3 + alert: true + message_alert: Page to defend the SLO + message_ok: Last 12 hours on track + window: 43200 + - name: 7 days + burn_rate_threshold: 1.5 + alert: false + message_alert: Dev team dedicates 25% of engineers to the reliability backlog + message_ok: Last week on track + window: 604800 + - name: 28 days + burn_rate_threshold: 1 + alert: false + message_alert: Freeze release, unless related to reliability or security + message_ok: Unfreeze release, per the agreed roll-out policy + window: 2419200 diff --git a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml index 5b384404..e6de9ca1 100644 --- a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml +++ b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml @@ -31,9 +31,5 @@ backend: http_request_duration_seconds_count{handler="/metrics", code=~"2.."}[window] ) exporters: - - class: Bigquery - project_id: rnm-shared-monitoring - dataset_id: slos - table_id: reports - - class: Stackdriver - project_id: rnm-shared-monitoring +- class: Prometheus + url: ${PROMETHEUS_PUSHGATEWAY_URL} diff --git a/samples/stackdriver/slo_pubsub_subscription_throughput.yaml b/samples/stackdriver/slo_pubsub_subscription_throughput.yaml index 58ab1943..08d57bd7 100644 --- a/samples/stackdriver/slo_pubsub_subscription_throughput.yaml +++ b/samples/stackdriver/slo_pubsub_subscription_throughput.yaml @@ -33,7 +33,3 @@ backend: exporters: - class: Stackdriver project_id: ${STACKDRIVER_HOST_PROJECT_ID} - -- class: Pubsub - project_id: ${PUBSUB_PROJECT_ID} - topic_name: ${PUBSUB_TOPIC_NAME} From dff27899fd89c4f23201625aada0282968865094 Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Mon, 31 May 2021 17:22:10 +0200 Subject: [PATCH 7/8] Migrate all samples --- .../slo_gae_app_availability.yaml | 35 +++++++++++ .../slo_gae_app_latency.yaml | 0 .../slo_lb_request_availability.yaml | 27 +++++++++ .../slo_lb_request_latency.yaml | 25 ++++++++ .../slo_pubsub_subscription_throughput.yaml | 24 ++++++++ .../slo_gae_app_availability.yaml | 36 +++++++++++ .../slo_gae_app_availability_basic.yaml | 20 +++++++ .../slo_gae_app_latency.yaml | 24 ++++++++ .../slo_gae_app_latency_basic.yaml | 21 +++++++ .../slo_gke_app_availability_basic.yaml | 21 +++++++ ...gke_app_availability_basic_deprecated.yaml | 23 +++++++ .../slo_gke_app_latency_basic.yaml | 22 +++++++ .../slo_gke_app_latency_basic_deprecated.yaml | 24 ++++++++ .../slo_lb_request_availability.yaml | 27 +++++++++ .../slo_lb_request_latency.yaml | 25 ++++++++ samples/config.yaml | 20 +++++-- ...slo_custom_app_availability_query_sli.yaml | 42 ++++++------- .../slo_custom_app_availability_ratio.yaml | 42 ++++++------- .../slo_dd_app_availability_query_sli.yaml | 49 ++++++--------- .../slo_dd_app_availability_query_slo.yaml | 43 +++++-------- .../slo_dd_app_availability_ratio.yaml | 50 ++++++---------- .../slo_dt_app_availability_ratio.yaml | 54 +++++++---------- .../slo_dt_app_latency_threshold.yaml | 51 ++++++---------- samples/elasticsearch/slo_elk_test_ratio.yaml | 60 +++++++------------ samples/error_budget_policy.yaml | 43 ------------- ...o_prom_metrics_availability_query_sli.yaml | 49 ++++++--------- .../slo_prom_metrics_availability_ratio.yaml | 49 ++++++--------- ...prom_metrics_latency_distribution_cut.yaml | 45 ++++++-------- .../slo_prom_metrics_latency_query_sli.yaml | 45 ++++++-------- .../stackdriver/slo_gae_app_availability.yaml | 46 -------------- .../slo_lb_request_availability.yaml | 38 ------------ .../stackdriver/slo_lb_request_latency.yaml | 36 ----------- .../slo_pubsub_subscription_throughput.yaml | 35 ----------- .../slo_gae_app_availability.yaml | 44 -------------- .../slo_gae_app_availability_basic.yaml | 28 --------- .../slo_gae_app_latency.yaml | 32 ---------- .../slo_gae_app_latency_basic.yaml | 29 --------- .../slo_gke_app_availability_basic.yaml | 29 --------- ...gke_app_availability_basic_deprecated.yaml | 31 ---------- .../slo_gke_app_latency_basic.yaml | 30 ---------- .../slo_gke_app_latency_basic_deprecated.yaml | 32 ---------- .../slo_lb_request_availability.yaml | 35 ----------- .../slo_lb_request_latency.yaml | 33 ---------- ...zz_slo_pubsub_subscription_throughput.yaml | 35 ----------- 44 files changed, 584 insertions(+), 925 deletions(-) create mode 100644 samples/cloud_monitoring/slo_gae_app_availability.yaml rename samples/{stackdriver => cloud_monitoring}/slo_gae_app_latency.yaml (100%) create mode 100644 samples/cloud_monitoring/slo_lb_request_availability.yaml create mode 100644 samples/cloud_monitoring/slo_lb_request_latency.yaml create mode 100644 samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_availability.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_latency.yaml create mode 100644 samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml create mode 100644 samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml create mode 100644 samples/cloud_service_monitoring/slo_lb_request_availability.yaml create mode 100644 samples/cloud_service_monitoring/slo_lb_request_latency.yaml delete mode 100644 samples/error_budget_policy.yaml delete mode 100644 samples/stackdriver/slo_gae_app_availability.yaml delete mode 100644 samples/stackdriver/slo_lb_request_availability.yaml delete mode 100644 samples/stackdriver/slo_lb_request_latency.yaml delete mode 100644 samples/stackdriver/slo_pubsub_subscription_throughput.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml delete mode 100644 samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml delete mode 100644 samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml diff --git a/samples/cloud_monitoring/slo_gae_app_availability.yaml b/samples/cloud_monitoring/slo_gae_app_availability.yaml new file mode 100644 index 00000000..675f290f --- /dev/null +++ b/samples/cloud_monitoring/slo_gae_app_availability.yaml @@ -0,0 +1,35 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-availability + labels: + service_name: gae + feature_name: app + slo_name: availability +spec: + description: Availability of App Engine app + backend: cloud_monitoring + method: good_bad_ratio + exporters: + - cloud_monitoring + service_level_indicator: + filter_good: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + ( metric.labels.response_code = 429 OR + metric.labels.response_code = 200 OR + metric.labels.response_code = 201 OR + metric.labels.response_code = 202 OR + metric.labels.response_code = 203 OR + metric.labels.response_code = 204 OR + metric.labels.response_code = 205 OR + metric.labels.response_code = 206 OR + metric.labels.response_code = 207 OR + metric.labels.response_code = 208 OR + metric.labels.response_code = 226 OR + metric.labels.response_code = 304 ) + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + goal: 0.95 diff --git a/samples/stackdriver/slo_gae_app_latency.yaml b/samples/cloud_monitoring/slo_gae_app_latency.yaml similarity index 100% rename from samples/stackdriver/slo_gae_app_latency.yaml rename to samples/cloud_monitoring/slo_gae_app_latency.yaml diff --git a/samples/cloud_monitoring/slo_lb_request_availability.yaml b/samples/cloud_monitoring/slo_lb_request_availability.yaml new file mode 100644 index 00000000..0a9b6f57 --- /dev/null +++ b/samples/cloud_monitoring/slo_lb_request_availability.yaml @@ -0,0 +1,27 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-availability + labels: + service_name: lb + feature_name: request + slo_name: availability +spec: + description: Availability of HTTP Load Balancer + backend: cloud_monitoring + method: good_bad_ratio + exporters: + - cloud_monitoring + service_level_indicator: + filter_good: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + goal: 0.98 diff --git a/samples/cloud_monitoring/slo_lb_request_latency.yaml b/samples/cloud_monitoring/slo_lb_request_latency.yaml new file mode 100644 index 00000000..56df0de2 --- /dev/null +++ b/samples/cloud_monitoring/slo_lb_request_latency.yaml @@ -0,0 +1,25 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-latency724ms + labels: + service_name: lb + feature_name: request + slo_name: latency724ms +spec: + description: Latency of HTTP Load Balancer < 724ms + backend: cloud_monitoring + method: distribution_cut + exporters: + - cloud_monitoring + service_level_indicator: + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/total_latencies" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + good_below_threshold: true + threshold_bucket: 19 + goal: 0.98 diff --git a/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml b/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml new file mode 100644 index 00000000..917e6fed --- /dev/null +++ b/samples/cloud_monitoring/slo_pubsub_subscription_throughput.yaml @@ -0,0 +1,24 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: pubsub-subscription-throughput + labels: + service_name: pubsub + feature_name: subscription + slo_name: throughput +spec: + description: Throughput of Pub/Sub subscription + backend: cloud_monitoring + method: good_bad_ratio + exporters: + - cloud_monitoring + service_level_indicator: + filter_good: > + project="${PUBSUB_PROJECT_ID}" + metric.type="pubsub.googleapis.com/subscription/ack_message_count" + resource.type="pubsub_subscription" + filter_bad: > + project="${PUBSUB_PROJECT_ID}" + metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" + resource.type="pubsub_subscription" + goal: 0.95 diff --git a/samples/cloud_service_monitoring/slo_gae_app_availability.yaml b/samples/cloud_service_monitoring/slo_gae_app_availability.yaml new file mode 100644 index 00000000..a2e97de3 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_availability.yaml @@ -0,0 +1,36 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-availability + labels: + service_name: gae + feature_name: app + slo_name: availability +spec: + description: Availability of App Engine app + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: good_bad_ratio + exporters: [] + service_level_indicator: + filter_good: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + ( metric.labels.response_code = 429 OR + metric.labels.response_code = 200 OR + metric.labels.response_code = 201 OR + metric.labels.response_code = 202 OR + metric.labels.response_code = 203 OR + metric.labels.response_code = 204 OR + metric.labels.response_code = 205 OR + metric.labels.response_code = 206 OR + metric.labels.response_code = 207 OR + metric.labels.response_code = 208 OR + metric.labels.response_code = 226 OR + metric.labels.response_code = 304 ) + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_count" + resource.type="gae_app" + goal: 0.95 diff --git a/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml b/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml new file mode 100644 index 00000000..dd6507ff --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_availability_basic.yaml @@ -0,0 +1,20 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-availability + labels: + service_name: gae + feature_name: app + slo_name: availability +spec: + description: Availability of App Engine app + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + app_engine: + project_id: ${GAE_PROJECT_ID} + module_id: ${GAE_MODULE_ID} + availability: {} + goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gae_app_latency.yaml b/samples/cloud_service_monitoring/slo_gae_app_latency.yaml new file mode 100644 index 00000000..c949fb88 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_latency.yaml @@ -0,0 +1,24 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-latency724ms + labels: + service_name: gae + feature_name: app + slo_name: latency724ms +spec: + description: Latency of App Engine app requests < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: distribution_cut + exporters: [] + service_level_indicator: + filter_valid: > + project=${GAE_PROJECT_ID} + metric.type="appengine.googleapis.com/http/server/response_latencies" + resource.type="gae_app" + metric.labels.response_code >= 200 + metric.labels.response_code < 500 + range_min: 0 + range_max: 724 + goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml b/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml new file mode 100644 index 00000000..e648e99d --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gae_app_latency_basic.yaml @@ -0,0 +1,21 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-latency724ms + labels: + service_name: gae + feature_name: app + slo_name: latency724ms +spec: + description: Latency of App Engine app requests < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + app_engine: + project_id: ${GAE_PROJECT_ID} + module_id: ${GAE_MODULE_ID} + latency: + threshold: 724 # ms + goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml b/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml new file mode 100644 index 00000000..ab4b675c --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_availability_basic.yaml @@ -0,0 +1,21 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gke-service-availability + labels: + service_name: gke + feature_name: service + slo_name: availability +spec: + description: Availability of GKE service + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + mesh_istio: + mesh_uid: ${GKE_MESH_UID} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + availability: {} + goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml b/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml new file mode 100644 index 00000000..16fc4ec0 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml @@ -0,0 +1,23 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gke-service-availability + labels: + service_name: gke + feature_name: service + slo_name: availability +spec: + description: Availability of GKE service + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + cluster_istio: + project_id: ${GKE_PROJECT_ID} + zone: ${GKE_LOCATION} + cluster_name: ${GKE_CLUSTER_NAME} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + availability: {} + goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml b/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml new file mode 100644 index 00000000..b3acf134 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_latency_basic.yaml @@ -0,0 +1,22 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gke-service-latency724ms + labels: + service_name: gke + feature_name: service + slo_name: latency724ms +spec: + description: Latency of GKE service requests < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + mesh_istio: + mesh_uid: ${GKE_MESH_UID} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + latency: + threshold: 724 # ms + goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml b/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml new file mode 100644 index 00000000..ced3999d --- /dev/null +++ b/samples/cloud_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml @@ -0,0 +1,24 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gke-service-latency724ms + labels: + service_name: gke + feature_name: service + slo_name: latency724ms +spec: + description: Latency of GKE service requests < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: basic + exporters: [] + service_level_indicator: + cluster_istio: + project_id: ${GKE_PROJECT_ID} + zone: ${GKE_LOCATION} + cluster_name: ${GKE_CLUSTER_NAME} + service_namespace: ${GKE_SERVICE_NAMESPACE} + service_name: ${GKE_SERVICE_NAME} + latency: + threshold: 724 # ms + goal: 0.999 diff --git a/samples/cloud_service_monitoring/slo_lb_request_availability.yaml b/samples/cloud_service_monitoring/slo_lb_request_availability.yaml new file mode 100644 index 00000000..6bc1df9c --- /dev/null +++ b/samples/cloud_service_monitoring/slo_lb_request_availability.yaml @@ -0,0 +1,27 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-availability + labels: + service_name: lb + feature_name: request + slo_name: availability +spec: + description: Availability of HTTP Load Balancer + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: good_bad_ratio + exporters: [] + service_level_indicator: + filter_good: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/request_count" + resource.type="https_lb_rule" + goal: 0.98 diff --git a/samples/cloud_service_monitoring/slo_lb_request_latency.yaml b/samples/cloud_service_monitoring/slo_lb_request_latency.yaml new file mode 100644 index 00000000..2b326176 --- /dev/null +++ b/samples/cloud_service_monitoring/slo_lb_request_latency.yaml @@ -0,0 +1,25 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: lb-request-latency724ms + labels: + service_name: lb + feature_name: request + slo_name: latency724ms +spec: + description: Latency of HTTP Load Balancer < 724ms + error_budget_policy: cloud_service_monitoring + backend: cloud_service_monitoring + method: distribution_cut + exporters: [] + service_level_indicator: + filter_valid: > + project=${LB_PROJECT_ID} + metric.type="loadbalancing.googleapis.com/https/total_latencies" + resource.type="https_lb_rule" + ( metric.label.response_code_class="200" OR + metric.label.response_code_class="300" OR + metric.label.response_code_class="400" ) + range_min: 0 + range_max: 724 # ms + goal: 0.98 diff --git a/samples/config.yaml b/samples/config.yaml index 390e9ade..ca8e7fef 100644 --- a/samples/config.yaml +++ b/samples/config.yaml @@ -18,15 +18,9 @@ backends: url: ${PROMETHEUS_URL} exporters: - bigquery: - project_id: rnm-shared-monitoring - dataset_id: slos - table_id: reports cloud_monitoring: project_id: ${STACKDRIVER_HOST_PROJECT_ID} cloud_monitoring/test: - project_id: rnm-shared-monitoring - cloud_monitoring/test2: project_id: ${PUBSUB_PROJECT_ID} custom.custom_exporter.CustomMetricExporter: {} custom.custom_exporter.CustomSLOExporter: {} @@ -73,3 +67,17 @@ error_budget_policies: message_alert: Freeze release, unless related to reliability or security message_ok: Unfreeze release, per the agreed roll-out policy window: 2419200 + cloud_service_monitoring: + steps: + - name: 24 hours + burn_rate_threshold: 4 + alert: true + message_alert: Page to defend the SLO + message_ok: Last 24 hours on track + window: 86400 + - name: 48 hours + burn_rate_threshold: 2 + alert: true + message_alert: Page to defend the SLO + message_ok: Last 48 hours on track + window: 172800 diff --git a/samples/custom/slo_custom_app_availability_query_sli.yaml b/samples/custom/slo_custom_app_availability_query_sli.yaml index 183f3b08..4869144d 100644 --- a/samples/custom/slo_custom_app_availability_query_sli.yaml +++ b/samples/custom/slo_custom_app_availability_query_sli.yaml @@ -1,25 +1,17 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: custom -feature_name: test -slo_description: 99.99% of fake requests to custom backends are valid -slo_name: availability-sli -slo_target: 0.999 -backend: - class: custom.custom_backend.CustomBackend - method: query_sli -exporters: -- class: custom.custom_exporter.CustomMetricExporter -- class: custom.custom_exporter.CustomSLOExporter +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: custom-test-availability-sli + labels: + service_name: custom + feature_name: test + slo_name: availability-sli +spec: + description: 99.99% of fake requests to custom backends are valid + backend: custom.custom_backend.CustomBackend + method: query_sli + exporters: + - custom.custom_exporter.CustomMetricExporter + - custom.custom_exporter.CustomSLOExporter + service_level_indicator: {} + goal: 0.999 diff --git a/samples/custom/slo_custom_app_availability_ratio.yaml b/samples/custom/slo_custom_app_availability_ratio.yaml index 6a8ac607..06a9c957 100644 --- a/samples/custom/slo_custom_app_availability_ratio.yaml +++ b/samples/custom/slo_custom_app_availability_ratio.yaml @@ -1,25 +1,17 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: custom -feature_name: test -slo_description: 99.99% of fake requests to custom backends are valid -slo_name: availability-ratio -slo_target: 0.999 -backend: - class: custom.custom_backend.CustomBackend - method: good_bad_ratio -exporters: -- class: custom.custom_exporter.CustomMetricExporter -- class: custom.custom_exporter.CustomSLOExporter +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: custom-test-availability-ratio + labels: + service_name: custom + feature_name: test + slo_name: availability-ratio +spec: + description: 99.99% of fake requests to custom backends are valid + backend: custom.custom_backend.CustomBackend + method: good_bad_ratio + exporters: + - custom.custom_exporter.CustomMetricExporter + - custom.custom_exporter.CustomSLOExporter + service_level_indicator: {} + goal: 0.999 diff --git a/samples/datadog/slo_dd_app_availability_query_sli.yaml b/samples/datadog/slo_dd_app_availability_query_sli.yaml index 47ad78b5..2fe9ed36 100644 --- a/samples/datadog/slo_dd_app_availability_query_sli.yaml +++ b/samples/datadog/slo_dd_app_availability_query_sli.yaml @@ -1,31 +1,18 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dd -feature_name: app -slo_name: availability -slo_description: 99% of app requests return a valid HTTP code -slo_target: 0.99 -backend: - class: Datadog - method: query_sli - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - # api_host: api.datadoghq.eu # uncomment to use EU site - measurement: - query: sum:app.requests.count{http.path:/, http.status_code_class:2xx}.as_count() / sum:app.requests.count{http.path:/}.as_count() -exporters: - - class: Datadog - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dd-app-availability + labels: + service_name: dd + feature_name: app + slo_name: availability +spec: + description: 99% of app requests return a valid HTTP code + backend: datadog + method: query_sli + exporters: + - datadog + service_level_indicator: + query: sum:app.requests.count{http.path:/, http.status_code_class:2xx}.as_count() + / sum:app.requests.count{http.path:/}.as_count() + goal: 0.99 diff --git a/samples/datadog/slo_dd_app_availability_query_slo.yaml b/samples/datadog/slo_dd_app_availability_query_slo.yaml index bc1c5b19..3465a64d 100644 --- a/samples/datadog/slo_dd_app_availability_query_slo.yaml +++ b/samples/datadog/slo_dd_app_availability_query_slo.yaml @@ -1,27 +1,16 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dd -feature_name: app -slo_name: availability -slo_description: 99% of app requests return a valid HTTP code -slo_target: 0.99 -backend: - class: Datadog - method: query_slo - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - # api_host: api.datadoghq.eu # uncomment to use EU site - measurement: - slo_id: ${DATADOG_SLO_ID} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dd-app-availability + labels: + service_name: dd + feature_name: app + slo_name: availability +spec: + description: 99% of app requests return a valid HTTP code + backend: datadog + method: query_slo + exporters: [] + service_level_indicator: + slo_id: ${DATADOG_SLO_ID} + goal: 0.99 diff --git a/samples/datadog/slo_dd_app_availability_ratio.yaml b/samples/datadog/slo_dd_app_availability_ratio.yaml index e524b23d..aa8be867 100644 --- a/samples/datadog/slo_dd_app_availability_ratio.yaml +++ b/samples/datadog/slo_dd_app_availability_ratio.yaml @@ -1,32 +1,18 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dd -feature_name: app -slo_name: availability -slo_description: 99% of app requests return a valid HTTP code -slo_target: 0.99 -backend: - class: Datadog - method: good_bad_ratio - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} - # api_host: api.datadoghq.eu # uncomment to use EU site - measurement: - query_good: app.requests.count{http.path:/, http.status_code_class:2xx} - query_valid: app.requests.count{http.path:/} -exporters: - - class: Datadog - api_key: ${DATADOG_API_KEY} - app_key: ${DATADOG_APP_KEY} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dd-app-availability + labels: + service_name: dd + feature_name: app + slo_name: availability +spec: + description: 99% of app requests return a valid HTTP code + backend: datadog + method: good_bad_ratio + exporters: + - datadog + service_level_indicator: + query_good: app.requests.count{http.path:/, http.status_code_class:2xx} + query_valid: app.requests.count{http.path:/} + goal: 0.99 diff --git a/samples/dynatrace/slo_dt_app_availability_ratio.yaml b/samples/dynatrace/slo_dt_app_availability_ratio.yaml index ac705550..6a678abc 100644 --- a/samples/dynatrace/slo_dt_app_availability_ratio.yaml +++ b/samples/dynatrace/slo_dt_app_availability_ratio.yaml @@ -1,36 +1,22 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dt -feature_name: app -slo_name: availability -slo_description: 99.9% of app requests return a good HTTP code -slo_target: 0.999 -backend: - class: Dynatrace - method: good_bad_ratio - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - measurement: +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dt-app-availability + labels: + service_name: dt + feature_name: app + slo_name: availability +spec: + description: 99.9% of app requests return a good HTTP code + backend: dynatrace + method: good_bad_ratio + exporters: + - dynatrace + service_level_indicator: query_good: - metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) - entity_selector: type(HOST) + metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) + entity_selector: type(HOST) query_valid: - metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod))) - entity_selector: type(HOST) -exporters: -- class: Dynatrace - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - metric_timeseries_id: custom:slo.error_budget_burn_rate + metric_selector: ext:app.request_count:filter(and(eq(app,test_app),eq(env,prod))) + entity_selector: type(HOST) + goal: 0.999 diff --git a/samples/dynatrace/slo_dt_app_latency_threshold.yaml b/samples/dynatrace/slo_dt_app_latency_threshold.yaml index bd97bb3d..0eb77aec 100644 --- a/samples/dynatrace/slo_dt_app_latency_threshold.yaml +++ b/samples/dynatrace/slo_dt_app_latency_threshold.yaml @@ -1,33 +1,20 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: dt -feature_name: app -slo_name: latency -slo_description: 99.9% of app 2xx requests return within 50ms -slo_target: 0.999 -backend: - class: Dynatrace - method: threshold - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} - measurement: +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: dt-app-latency + labels: + service_name: dt + feature_name: app + slo_name: latency +spec: + description: 99.9% of app 2xx requests return within 50ms + backend: dynatrace + method: threshold + exporters: + - dynatrace/test + service_level_indicator: query_valid: - metric_selector: ext:app.request_latency:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) - entity_selector: type(HOST) - threshold: 50000 # us -exporters: -- class: Dynatrace - api_url: ${DYNATRACE_API_URL} - api_token: ${DYNATRACE_API_TOKEN} + metric_selector: ext:app.request_latency:filter(and(eq(app,test_app),eq(env,prod),eq(status_code_class,2xx))) + entity_selector: type(HOST) + threshold: 50000 # us + goal: 0.999 diff --git a/samples/elasticsearch/slo_elk_test_ratio.yaml b/samples/elasticsearch/slo_elk_test_ratio.yaml index 2114f902..adb834c0 100644 --- a/samples/elasticsearch/slo_elk_test_ratio.yaml +++ b/samples/elasticsearch/slo_elk_test_ratio.yaml @@ -1,41 +1,27 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: elk -feature_name: test -slo_description: > - SLO for random test data generated with the - https://github.com/oliver006/elasticsearch-test-data -slo_name: errors -slo_target: 1 -backend: - class: Elasticsearch - url: ${ELASTICSEARCH_URL} - method: good_bad_ratio - measurement: - index: test_data - date_field: last_updated - query_good: {} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: elk-test-errors + labels: + service_name: elk + feature_name: test + slo_name: errors +spec: + description: > + SLO for random test data generated with the + https://github.com/oliver006/elasticsearch-test-data + backend: elasticsearch + method: good_bad_ratio + exporters: + - pubsub + - cloud_monitoring/test + service_level_indicator: + index: test_data + date_field: last_updated + query_good: {} query_bad: must: term: - name: JAgOZE8 + name: JAgOZE8 -exporters: -- class: Pubsub - project_id: ${PUBSUB_PROJECT_ID} - topic_name: ${PUBSUB_TOPIC_NAME} - -- class: Stackdriver - project_id: ${PUBSUB_PROJECT_ID} + goal: 1 diff --git a/samples/error_budget_policy.yaml b/samples/error_budget_policy.yaml deleted file mode 100644 index d491fc97..00000000 --- a/samples/error_budget_policy.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -- error_budget_policy_step_name: 1 hour - measurement_window_seconds: 3600 - alerting_burn_rate_threshold: 9 - urgent_notification: true - overburned_consequence_message: Page to defend the SLO - achieved_consequence_message: Last hour on track - -- error_budget_policy_step_name: 12 hours - measurement_window_seconds: 43200 - alerting_burn_rate_threshold: 3 - urgent_notification: true - overburned_consequence_message: Page to defend the SLO - achieved_consequence_message: Last 12 hours on track - -- error_budget_policy_step_name: 7 days - measurement_window_seconds: 604800 - alerting_burn_rate_threshold: 1.5 - urgent_notification: false - overburned_consequence_message: Dev team dedicates 25% of engineers to the - reliability backlog - achieved_consequence_message: Last week on track - -- error_budget_policy_step_name: 28 days - measurement_window_seconds: 2419200 - alerting_burn_rate_threshold: 1 - urgent_notification: false - overburned_consequence_message: Freeze release, unless related to reliability - or security - achieved_consequence_message: Unfreeze release, per the agreed roll-out policy diff --git a/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml b/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml index 05076a04..7f4f6718 100644 --- a/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml +++ b/samples/prometheus/slo_prom_metrics_availability_query_sli.yaml @@ -1,35 +1,20 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_name: availability -slo_description: 99.9% of Prometheus requests return a good HTTP code -slo_target: 0.999 -backend: - class: Prometheus - method: query_sli - url: ${PROMETHEUS_URL} - # Basic auth example: - # headers: - # Content-Type: application/json - # Authorization: Basic b2s6cGFzcW== # username:password base64-encoded - measurement: - expression: > +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-availability + labels: + service_name: prom + feature_name: metrics + slo_name: availability +spec: + description: 99.9% of Prometheus requests return a good HTTP code + backend: prometheus + method: query_sli + exporters: + - prometheus + service_level_indicator: + expression: > sum(rate(prometheus_http_requests_total{handler="/metrics", code=~"2.."}[window])) / sum(rate(prometheus_http_requests_total{handler="/metrics"}[window])) -exporters: -- class: Prometheus - url: ${PROMETHEUS_PUSHGATEWAY_URL} + goal: 0.999 diff --git a/samples/prometheus/slo_prom_metrics_availability_ratio.yaml b/samples/prometheus/slo_prom_metrics_availability_ratio.yaml index 23b5171e..7e4db0cf 100644 --- a/samples/prometheus/slo_prom_metrics_availability_ratio.yaml +++ b/samples/prometheus/slo_prom_metrics_availability_ratio.yaml @@ -1,34 +1,19 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_name: availability -slo_description: 99.9% of Prometheus requests return a good HTTP code -slo_target: 0.999 -backend: - class: Prometheus - method: good_bad_ratio - url: ${PROMETHEUS_URL} - # Basic auth example: - # headers: - # Content-Type: application/json - # Authorization: Basic b2s6cGFzcW== # username:password base64-encoded - measurement: - filter_good: prometheus_http_requests_total{handler="/metrics", code=~"2.."} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-availability + labels: + service_name: prom + feature_name: metrics + slo_name: availability +spec: + description: 99.9% of Prometheus requests return a good HTTP code + backend: prometheus + method: good_bad_ratio + exporters: + - prometheus + service_level_indicator: + filter_good: prometheus_http_requests_total{handler="/metrics", code=~"2.."} filter_valid: prometheus_http_requests_total{handler="/metrics"} # filter_bad: prometheus_http_requests_total{code=~"5..", handler="/metrics"} # alternative to filter_valid field -exporters: -- class: Prometheus - url: ${PROMETHEUS_PUSHGATEWAY_URL} + goal: 0.999 diff --git a/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml b/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml index b2b8da92..95ea93d2 100644 --- a/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml +++ b/samples/prometheus/slo_prom_metrics_latency_distribution_cut.yaml @@ -1,29 +1,18 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_description: 99.99% of Prometheus requests return in less than 250ms -slo_name: latency -slo_target: 0.9999 -backend: - class: Prometheus - url: ${PROMETHEUS_URL} - method: distribution_cut - measurement: - expression: http_request_duration_seconds_bucket{handler="/metrics", code=~"2.."} +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-latency + labels: + service_name: prom + feature_name: metrics + slo_name: latency +spec: + description: 99.99% of Prometheus requests return in less than 250ms + backend: prometheus + method: distribution_cut + exporters: + - prometheus + service_level_indicator: + expression: http_request_duration_seconds_bucket{handler="/metrics", code=~"2.."} threshold_bucket: 0.25 # in seconds, corresponds to the `le` (less than) PromQL label -exporters: -- class: Prometheus - url: ${PROMETHEUS_PUSHGATEWAY_URL} + goal: 0.9999 diff --git a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml index e6de9ca1..35a2bd92 100644 --- a/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml +++ b/samples/prometheus/slo_prom_metrics_latency_query_sli.yaml @@ -1,28 +1,19 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: prom -feature_name: metrics -slo_description: 99.99% of Prometheus requests return in less than 250ms -slo_name: latency -slo_target: 0.9999 -backend: - class: Prometheus - url: ${PROMETHEUS_URL} - method: query_sli - measurement: - expression: > +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: prom-metrics-latency + labels: + service_name: prom + feature_name: metrics + slo_name: latency +spec: + description: 99.99% of Prometheus requests return in less than 250ms + backend: prometheus + method: query_sli + exporters: + - prometheus + service_level_indicator: + expression: > increase( http_request_duration_seconds_bucket{handler="/metrics", code=~"2..",le="0.25"}[window] ) @@ -30,6 +21,4 @@ backend: increase( http_request_duration_seconds_count{handler="/metrics", code=~"2.."}[window] ) -exporters: -- class: Prometheus - url: ${PROMETHEUS_PUSHGATEWAY_URL} + goal: 0.9999 diff --git a/samples/stackdriver/slo_gae_app_availability.yaml b/samples/stackdriver/slo_gae_app_availability.yaml deleted file mode 100644 index 9da3ab32..00000000 --- a/samples/stackdriver/slo_gae_app_availability.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Availability of App Engine app -slo_name: availability -slo_target: 0.95 -backend: - class: Stackdriver - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" - ( metric.labels.response_code = 429 OR - metric.labels.response_code = 200 OR - metric.labels.response_code = 201 OR - metric.labels.response_code = 202 OR - metric.labels.response_code = 203 OR - metric.labels.response_code = 204 OR - metric.labels.response_code = 205 OR - metric.labels.response_code = 206 OR - metric.labels.response_code = 207 OR - metric.labels.response_code = 208 OR - metric.labels.response_code = 226 OR - metric.labels.response_code = 304 ) - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_lb_request_availability.yaml b/samples/stackdriver/slo_lb_request_availability.yaml deleted file mode 100644 index 000d48c2..00000000 --- a/samples/stackdriver/slo_lb_request_availability.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Availability of HTTP Load Balancer -slo_name: availability -slo_target: 0.98 -backend: - class: Stackdriver - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_lb_request_latency.yaml b/samples/stackdriver/slo_lb_request_latency.yaml deleted file mode 100644 index 1216825b..00000000 --- a/samples/stackdriver/slo_lb_request_latency.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Latency of HTTP Load Balancer < 724ms -slo_name: latency724ms -slo_target: 0.98 -backend: - class: Stackdriver - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/total_latencies" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - good_below_threshold: true - threshold_bucket: 19 -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver/slo_pubsub_subscription_throughput.yaml b/samples/stackdriver/slo_pubsub_subscription_throughput.yaml deleted file mode 100644 index 08d57bd7..00000000 --- a/samples/stackdriver/slo_pubsub_subscription_throughput.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: pubsub -feature_name: subscription -slo_description: Throughput of Pub/Sub subscription -slo_name: throughput -slo_target: 0.95 -backend: - class: Stackdriver - project_id: "${STACKDRIVER_HOST_PROJECT_ID}" - method: good_bad_ratio - measurement: - filter_good: > - project="${PUBSUB_PROJECT_ID}" - metric.type="pubsub.googleapis.com/subscription/ack_message_count" - resource.type="pubsub_subscription" - filter_bad: > - project="${PUBSUB_PROJECT_ID}" - metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" - resource.type="pubsub_subscription" -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml deleted file mode 100644 index 8ee66be7..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_availability.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Availability of App Engine app -slo_name: availability -slo_target: 0.95 -backend: - class: StackdriverServiceMonitoring - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" - ( metric.labels.response_code = 429 OR - metric.labels.response_code = 200 OR - metric.labels.response_code = 201 OR - metric.labels.response_code = 202 OR - metric.labels.response_code = 203 OR - metric.labels.response_code = 204 OR - metric.labels.response_code = 205 OR - metric.labels.response_code = 206 OR - metric.labels.response_code = 207 OR - metric.labels.response_code = 208 OR - metric.labels.response_code = 226 OR - metric.labels.response_code = 304 ) - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_count" - resource.type="gae_app" diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml deleted file mode 100644 index 2a43c4eb..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_availability_basic.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Availability of App Engine app -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - app_engine: - project_id: ${GAE_PROJECT_ID} - module_id: ${GAE_MODULE_ID} - availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml deleted file mode 100644 index 777e386a..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_latency.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Latency of App Engine app requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > - project=${GAE_PROJECT_ID} - metric.type="appengine.googleapis.com/http/server/response_latencies" - resource.type="gae_app" - metric.labels.response_code >= 200 - metric.labels.response_code < 500 - range_min: 0 - range_max: 724 diff --git a/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml b/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml deleted file mode 100644 index 3d91dffb..00000000 --- a/samples/stackdriver_service_monitoring/slo_gae_app_latency_basic.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gae -feature_name: app -slo_description: Latency of App Engine app requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - app_engine: - project_id: ${GAE_PROJECT_ID} - module_id: ${GAE_MODULE_ID} - latency: - threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml deleted file mode 100644 index ad4dd6ee..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Availability of GKE service -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - mesh_istio: - mesh_uid: ${GKE_MESH_UID} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml deleted file mode 100644 index b4ab973f..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_availability_basic_deprecated.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Availability of GKE service -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - cluster_istio: - project_id: ${GKE_PROJECT_ID} - location: ${GKE_LOCATION} - cluster_name: ${GKE_CLUSTER_NAME} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - availability: {} diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml deleted file mode 100644 index c2de6af2..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Latency of GKE service requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - mesh_istio: - mesh_uid: ${GKE_MESH_UID} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - latency: - threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml b/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml deleted file mode 100644 index e7643909..00000000 --- a/samples/stackdriver_service_monitoring/slo_gke_app_latency_basic_deprecated.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: gke -feature_name: service -slo_description: Latency of GKE service requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: StackdriverServiceMonitoring - method: basic - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - cluster_istio: - project_id: ${GKE_PROJECT_ID} - location: ${GKE_LOCATION} - cluster_name: ${GKE_CLUSTER_NAME} - service_namespace: ${GKE_SERVICE_NAMESPACE} - service_name: ${GKE_SERVICE_NAME} - latency: - threshold: 724 # ms diff --git a/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml b/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml deleted file mode 100644 index 428d411b..00000000 --- a/samples/stackdriver_service_monitoring/slo_lb_request_availability.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Availability of HTTP Load Balancer -slo_name: availability -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: good_bad_ratio - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_good: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/request_count" - resource.type="https_lb_rule" diff --git a/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml b/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml deleted file mode 100644 index 3e19d8e6..00000000 --- a/samples/stackdriver_service_monitoring/slo_lb_request_latency.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -service_name: lb -feature_name: request -slo_description: Latency of HTTP Load Balancer < 724ms -slo_name: latency724ms -slo_target: 0.98 -backend: - class: StackdriverServiceMonitoring - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > - project=${LB_PROJECT_ID} - metric.type="loadbalancing.googleapis.com/https/total_latencies" - resource.type="https_lb_rule" - ( metric.label.response_code_class="200" OR - metric.label.response_code_class="300" OR - metric.label.response_code_class="400" ) - range_min: 0 - range_max: 724 # ms diff --git a/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml b/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml deleted file mode 100644 index cb678d3b..00000000 --- a/samples/stackdriver_service_monitoring/zzz_slo_pubsub_subscription_throughput.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2019 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# TODO: Doesn't work at the moment because Stackdriver Service Monitoring API -# does not support Gauge-type metrics. - -# --- -# service_name: pubsub -# feature_name: subscription -# slo_description: Throughput of Pub/Sub subscription -# slo_name: throughput -# slo_target: 0.95 -# backend: -# class: StackdriverServiceMonitoring -# project_id: "${STACKDRIVER_HOST_PROJECT_ID}" -# method: good_bad_ratio -# measurement: -# filter_good: > -# project="${PUBSUB_PROJECT_ID}" -# metric.type="pubsub.googleapis.com/subscription/ack_message_count" -# resource.type="pubsub_subscription" -# filter_bad: > -# project="${PUBSUB_PROJECT_ID}" -# metric.type="pubsub.googleapis.com/subscription/num_outstanding_messages" -# resource.type="pubsub_subscription" From 8da3b3990a053abb4c22f698894ab40aa45c79ed Mon Sep 17 00:00:00 2001 From: Olivier Cervello Date: Mon, 31 May 2021 18:45:58 +0200 Subject: [PATCH 8/8] Migrate app latency --- .../cloud_monitoring/slo_gae_app_latency.yaml | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/samples/cloud_monitoring/slo_gae_app_latency.yaml b/samples/cloud_monitoring/slo_gae_app_latency.yaml index fdbcfbed..13d1022f 100644 --- a/samples/cloud_monitoring/slo_gae_app_latency.yaml +++ b/samples/cloud_monitoring/slo_gae_app_latency.yaml @@ -12,24 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -service_name: gae -feature_name: app -slo_description: Latency of App Engine app requests < 724ms -slo_name: latency724ms -slo_target: 0.999 -backend: - class: Stackdriver - method: distribution_cut - project_id: ${STACKDRIVER_HOST_PROJECT_ID} - measurement: - filter_valid: > +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: gae-app-latency724ms + labels: + service_name: gae + feature_name: app + slo_name: latency724ms +spec: + description: Latency of App Engine app requests < 724ms + backend: cloud_monitoring + method: distribution_cut + exporters: + - cloud_monitoring + service_level_indicator: + filter_valid: > project=${GAE_PROJECT_ID} metric.type="appengine.googleapis.com/http/server/response_latencies" resource.type="gae_app" metric.labels.response_code >= 200 metric.labels.response_code < 500 good_below_threshold: true - threshold_bucket: 19 -exporters: -- class: Stackdriver - project_id: ${STACKDRIVER_HOST_PROJECT_ID} + threshold_bucket: 19 + goal: 0.999