diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 9449a95d..a9f9f332 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -766,3 +766,25 @@ for the cluster autoscaler. Limits can be adjusted by modifying the cluster auto description: | A cluster node logged {{ $value }} SELinux AVC denial(s) per minute to the audit log. sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-043-selinux-violation.md" + - alert: ClusterAuditNetworkPolicyViolations + expr: | + network_policy_denials_sample_count > 0 + for: 10m + labels: + severity: info + annotations: + summary: "Network Policy Violations occuring on cluster." + description: | + A cluster node logged Network Policy ACL denial(s) for 10 minutes. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-044-network-policy-violation.md" + - alert: ClusterAuditNetworkPolicyViolations + expr: | + network_policy_denials_sample_count >= 15 + for: 1m + labels: + severity: info + annotations: + summary: "Network Policy Violations occuring on cluster." + description: | + A cluster node logged at least {{ $value }} Network Policy ACL denial(s) per minute. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-044-network-policy-violation.md" diff --git a/resources/prometheus/unit_tests/ClusterAuditNetworkPolicyViolations.yaml b/resources/prometheus/unit_tests/ClusterAuditNetworkPolicyViolations.yaml new file mode 100644 index 00000000..18825ca3 --- /dev/null +++ b/resources/prometheus/unit_tests/ClusterAuditNetworkPolicyViolations.yaml @@ -0,0 +1,59 @@ +rule_files: + - /tmp/prometheus-rules-test.yaml + +evaluation_interval: 1m + +tests: + - interval: 1m + input_series: + - series: network_policy_denials_sample_count{namespace="rhacs-cloudwatch"} + values: "15x1" + alert_rule_test: + - eval_time: 70s + alertname: ClusterAuditNetworkPolicyViolations + exp_alerts: + - exp_labels: + alertname: ClusterAuditNetworkPolicyViolations + namespace: rhacs-cloudwatch + severity: info + exp_annotations: + summary: "Network Policy Violations occuring on cluster." + description: | + A cluster node logged at least 15 Network Policy ACL denial(s) per minute. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-044-network-policy-violation.md" + + - interval: 1m + input_series: + - series: network_policy_denials_sample_count{namespace="rhacs-cloudwatch"} + values: "1x10" + alert_rule_test: + - eval_time: 610s + alertname: ClusterAuditNetworkPolicyViolations + exp_alerts: + - exp_labels: + alertname: ClusterAuditNetworkPolicyViolations + namespace: rhacs-cloudwatch + severity: info + exp_annotations: + summary: "Network Policy Violations occuring on cluster." + description: | + A cluster node logged Network Policy ACL denial(s) for 10 minutes. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-044-network-policy-violation.md" + + - interval: 1m + input_series: + - series: network_policy_denials_sample_count{namespace="rhacs-cloudwatch"} + values: "1x9 0" + alert_rule_test: + - eval_time: 10m + alertname: ClusterAuditNetworkPolicyViolations + exp_alerts: [] + + - interval: 1m + input_series: + - series: network_policy_denials_sample_count{namespace="rhacs-cloudwatch"} + values: "14x1" + alert_rule_test: + - eval_time: 70s + alertname: ClusterAuditNetworkPolicyViolations + exp_alerts: []