Skip to content

Commit

Permalink
Merge pull request 'added support for monitoring oosync apps of argoc…
Browse files Browse the repository at this point in the history
…d, for now only kubeaid apps' (#353) from argocd_app_oosync into master

Reviewed-on: https://gitea.obmondo.com/EnableIT/KubeAid/pulls/353
  • Loading branch information
ashish1099 committed Aug 18, 2024
2 parents 9f9428b + 730161c commit 5e984f3
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 6 deletions.
104 changes: 102 additions & 2 deletions build/kube-prometheus/common-template.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,76 @@ local default_vars = {
'cert-manager',
'traefik',
],

kubeaid_users_apps+: [],
kubeaid_apps+: [
'argo-cd',
'argocd-image-updater',
'aws-ebs-csi-driver',
'aws-efs-csi-driver',
'capi-cluster',
'ccm-hetzner',
'cerebro',
'cert-manager',
'cilium',
'circleci-runner',
'cloudnative-pg',
'cluster-api',
'cluster-autoscaler',
'crossplane',
'dokuwiki',
'errbot',
'external-dns',
'filebeat',
'fluent-bit',
'gatekeeper',
'gitea-runner',
'gitlab-runner',
'grafana-operator',
'graylog',
'haproxy',
'harbor',
'k8id-custom-azure',
'k8s-event-logger',
'keda',
'keycloakx',
'kube2iam',
'kubernetes-dashboard',
'mail',
'mariadb-operator',
'matomo',
'mattermost-team-edition',
'metallb',
'metrics-server',
'mongodb-operator',
'obmondo-k8s-agent',
'oncall',
'opencost',
'opensearch',
'opensearch-dashboards',
'postgres-operator',
'prometheus-adapter',
'prometheus-linuxaid',
'puppetserver',
'rabbitmq-operator',
'redis-operator',
'redmine',
'relate',
'reloader',
'rook-ceph',
'sealed-secrets',
'snapshot-controller',
'sonarqube',
'strimzi-kafka-operator',
'teleport-cluster',
'teleport-kube-agent',
'tigera-operator',
'traefik',
'traefik-forward-auth',
'velero',
'whoami',
'yetibot',
'zfs-localpv',
],
prometheus_operator_resources: {
limits: { memory: '80Mi' },
requests: { cpu: '20m', memory: '80Mi' },
Expand Down Expand Up @@ -207,7 +276,37 @@ local kp =
import 'kube-prometheus/addons/custom-metrics.libsonnet'
) else {}
) +

{
argocdApplications+: {
prometheusRuleExample+: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
name: 'kubeaid-managed-apps',
namespace: $.values.common.namespace,
},
spec: {
groups: [
{
name: 'kubeaidManagedApps',
rules: [
{
record: 'kubeaidManagedApps',
expr: 0,
labels: {
name: argocdApps,
// TODO: maybe add support for other projects
project: 'default',
},
}
for argocdApps in vars.kubeaid_apps + vars.kubeaid_users_apps
],
},
],
},
},
},
} +
{
grafana+: {
networkPolicy+: {
Expand Down Expand Up @@ -679,6 +778,7 @@ local kp =
} } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['argocd-application-prometheus-rules' + name]: kp.argocdApplications[name] for name in std.objectFields(kp.argocdApplications) } +
(
// Need to figure out elseif
// if vars != 'gke' || vars != 'azure' didnt worked
Expand Down
8 changes: 4 additions & 4 deletions build/kube-prometheus/mixins/argo-cd/mixin.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,23 @@
// Inspiration from here https://github.com/adinhodovic/argo-cd-mixin/blob/main/alerts/alerts.libsonnet
{
alert: 'ArgoCdAppOutOfSync',
expr: 'sum by (job, dest_server, project, sync_status) (argocd_app_info{job=~".*",sync_status!="Synced"}) >= 1',
expr: 'count by (project, sync_status) ((sum by (name, job, dest_server, project, sync_status) (argocd_app_info{job=~".*",sync_status!="Synced"}) >= 1) + on (name) group_left kubeaidManagedApps)',
labels: {
severity: 'warning',
},
'for': '15m',
'for': '2h',
annotations: {
summary: 'ArgoCD Application is Out Of Sync.',
description: 'Multiple application under {{ .Labels.project }} is out of sync with the sync status {{ .Labels.sync_status }} for the past 15m',
},
},
{
alert: 'ArgoCdAppUnhealthy',
expr: 'sum by (job, project, dest_server, health_status) (argocd_app_info{health_status!~"Healthy|Progressing"}) >= 1',
expr: 'count by (health_status,project) ((sum by (name, job, dest_server, project, health_status) (argocd_app_info{health_status!~"Healthy|Progressing"}) >= 1) + on (name) group_left kubeaidManagedApps)',
labels: {
severity: 'warning',
},
'for': '15m',
'for': '2h',
annotations: {
summary: 'ArgoCD Application is not healthy.',
description: 'Multiple application under {{ .Labels.project }} is not healthy with the health status {{ .Labels.health_status }} for the past 15m',
Expand Down

0 comments on commit 5e984f3

Please sign in to comment.