diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index b2f33f5..6521231 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -128,6 +128,22 @@ spec: summary: "Fleetshard synchronizer container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` restarted more than 3 times." description: "Fleetshard synchronizer container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` has restarted more than 3 times during the last 30 minutes." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-005-fleetshard-sync-unavailable.md" + - alert: RHACSFleetshardCertificateExpiryCritical + expr: | + acs_fleetshard_certificate_expiration_timestamp <= 1 * 24 * 60 * 60 + time() + labels: + severity: critical + annotations: + summary: "Certificate expiring very soon: `{{ $labels.exported_namespace }}/{{ $labels.secret }}/{{ $labels.data_key }}`." + description: "Certificate `{{ $labels.exported_namespace }}/{{ $labels.secret }}/{{ $labels.data_key }}` expires on {{ humanizeTimestamp $value}}." + - alert: RHACSFleetshardCertificateExpiryWarning + expr: | + acs_fleetshard_certificate_expiration_timestamp <= 7* 24 * 60 * 60 + time() + labels: + severity: warning + annotations: + summary: "Certificate expiring soon: `{{ $labels.exported_namespace }}/{{ $labels.secret }}/{{ $labels.data_key }}`." + description: "Certificate `{{ $labels.exported_namespace }}/{{ $labels.secret }}/{{ $labels.data_key }}` expires on {{ humanizeTimestamp $value}}." - alert: RHACSFleetshardSyncReconciliationErrors expr: | acs_fleetshard_central_errors_per_reconciliations:ratio_rate10m > 0.10 diff --git a/resources/prometheus/unit_tests/RHACSFleetschardCertificateExpiring.yaml b/resources/prometheus/unit_tests/RHACSFleetschardCertificateExpiring.yaml new file mode 100644 index 0000000..57516c5 --- /dev/null +++ b/resources/prometheus/unit_tests/RHACSFleetschardCertificateExpiring.yaml @@ -0,0 +1,51 @@ +rule_files: + - /tmp/prometheus-rules-test.yaml + +evaluation_interval: 1m + +tests: + - interval: 1d + input_series: + - series: acs_fleetshard_certificate_expiration_timestamp{exported_namespace="rhacs-00000000000000000000", secret="secret", data_key="key"} + values: "691200+0x15" # equals to 8 days + + alert_rule_test: + - eval_time: 0 + alertname: RHACSFleetshardCertificateExpiryWarning + exp_alerts: [ ] + - eval_time: 3d + alertname: RHACSFleetshardCertificateExpiryWarning + exp_alerts: + - exp_labels: + alertname: RHACSFleetshardCertificateExpiryWarning + exported_namespace: rhacs-00000000000000000000 + secret: secret + data_key: key + severity: warning + exp_annotations: + summary: "Certificate expiring soon: `rhacs-00000000000000000000/secret/key`." + description: "Certificate `rhacs-00000000000000000000/secret/key` expires on 1970-01-09 00:00:00 +0000 UTC." + - eval_time: 7d + alertname: RHACSFleetshardCertificateExpiryCritical + exp_alerts: + - exp_labels: + alertname: RHACSFleetshardCertificateExpiryCritical + exported_namespace: rhacs-00000000000000000000 + secret: secret + data_key: key + severity: critical + exp_annotations: + summary: "Certificate expiring very soon: `rhacs-00000000000000000000/secret/key`." + description: "Certificate `rhacs-00000000000000000000/secret/key` expires on 1970-01-09 00:00:00 +0000 UTC." + - eval_time: 10d + alertname: RHACSFleetshardCertificateExpiryCritical + exp_alerts: + - exp_labels: + alertname: RHACSFleetshardCertificateExpiryCritical + exported_namespace: rhacs-00000000000000000000 + secret: secret + data_key: key + severity: critical + exp_annotations: + summary: "Certificate expiring very soon: `rhacs-00000000000000000000/secret/key`." + description: "Certificate `rhacs-00000000000000000000/secret/key` expires on 1970-01-09 00:00:00 +0000 UTC."