diff --git a/helm/prometheus-rules/templates/platform/shield/alerting-rules/trivy.rules.yml b/helm/prometheus-rules/templates/platform/shield/alerting-rules/general.rules.yml similarity index 70% rename from helm/prometheus-rules/templates/platform/shield/alerting-rules/trivy.rules.yml rename to helm/prometheus-rules/templates/platform/shield/alerting-rules/general.rules.yml index 8a087d46..f0fb1ff7 100644 --- a/helm/prometheus-rules/templates/platform/shield/alerting-rules/trivy.rules.yml +++ b/helm/prometheus-rules/templates/platform/shield/alerting-rules/general.rules.yml @@ -10,11 +10,11 @@ spec: groups: - name: trivy rules: - - alert: TrivyComponentRestartingTooOften + - alert: ShieldComponentRestartingTooOften annotations: - description: 'Trivy pod {{ $labels.namespace }}/{{ $labels.pod }} is restarting too often' - opsrecipe: trivy-pod-failing - expr: increase(kube_pod_container_status_restarts_total{cluster_type="workload_cluster", pod=~"trivy-.*"}[1h]) > 5 + description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} is restarting too often.' + opsrecipe: shield-pod-failing + expr: increase(kube_pod_container_status_restarts_total{cluster_type="workload_cluster", pod=~"trivy-.*|kyverno-.*|falco-*|"}[1h]) > 5 for: 30m labels: area: platform @@ -24,4 +24,4 @@ spec: cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }} severity: notify team: shield - topic: trivy + topic: security diff --git a/test/tests/providers/global/platform/shield/alerting-rules/general.rules.test.yml b/test/tests/providers/global/platform/shield/alerting-rules/general.rules.test.yml new file mode 100644 index 00000000..dbcfc2f5 --- /dev/null +++ b/test/tests/providers/global/platform/shield/alerting-rules/general.rules.test.yml @@ -0,0 +1,30 @@ +--- +rule_files: + - kyverno.rules.yml +tests: + - interval: 1m + input_series: + # Kyverno validating webhooks + - series: 'kube_pod_container_status_restarts_total{app="kube-state-metrics", cluster_id="golem", cluster_type="workload_cluster", container="main", customer="giantswarm", endpoint="http", installation="golem", instance="100.94.2.68:8080", job="kube-state-metrics", namespace="security", node="master-0001", organization="giantswarm", pipeline="stable", pod="trivy-0", provider="capa", region="eu-west-1", service="kube-prometheus-stack-kube-state-metrics", service_priority="medium", uid="a38e8606-416d-4d6c-a7ea-2745078de330"}' + values: "1+1x10" + alert_rule_test: + # Trivy pod + - alertname: ShieldComponentRestartingTooOften + eval_time: 90m + exp_alerts: + - exp_labels: + area: platform + cluster_id: golem + installation: golem + pipeline: testing + provider: aws + severity: notify + team: shield + topic: security + cancel_if_cluster_status_creating: "true" + cancel_if_cluster_status_deleting: "true" + cancel_if_cluster_status_updating: "true" + cancel_if_outside_working_hours: "false" + exp_annotations: + description: '{{`Pod security/trivy-0 is restarting too often.`}}' + opsrecipe: "shield-pod-failing/"