Skip to content

Commit

Permalink
chore(oncall): replace rabbitmq cr with a helmrelease
Browse files Browse the repository at this point in the history
  • Loading branch information
SmaineTF1 committed Nov 29, 2024
1 parent e876e4f commit 6d60ad4
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ spec:
- hubble-${cluster_name}.priv.${domain_name}
- vmalertmanager-${cluster_name}.priv.${domain_name}
- vmagent-${cluster_name}.priv.${domain_name}
- oncall.priv.${domain_name}
- oncall-rabbitmq.priv.${domain_name}
issuerRef:
name: openbao
kind: ClusterIssuer
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/base/gapi/platform-private-gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: instance
service.beta.kubernetes.io/aws-load-balancer-scheme: "internal"
service.beta.kubernetes.io/aws-load-balancer-type: "external"
external-dns.alpha.kubernetes.io/hostname: "harbor.priv.${domain_name},grafana.priv.${domain_name},vm.priv.${domain_name},headlamp.priv.${domain_name},hubble-${cluster_name}.priv.${domain_name},vmalertmanager-${cluster_name}.priv.${domain_name},vmagent-${cluster_name}.priv.${domain_name}"
external-dns.alpha.kubernetes.io/hostname: "harbor.priv.${domain_name},grafana.priv.${domain_name},vm.priv.${domain_name},headlamp.priv.${domain_name},hubble-${cluster_name}.priv.${domain_name},vmalertmanager-${cluster_name}.priv.${domain_name},vmagent-${cluster_name}.priv.${domain_name},oncall.priv.${domain_name},oncall-rabbitmq.priv.${domain_name},oncall-rabbitmq.priv.${domain_name}"
listeners:
- name: http
hostname: "*.priv.${domain_name}"
Expand Down
22 changes: 4 additions & 18 deletions observability/base/grafana-oncall/externalsecret-rabbitmq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,10 @@ spec:
name: clustersecretstore
refreshInterval: 1h
target:
name: oncall-rabbitmq
creationPolicy: Owner
deletionPolicy: Retain
template:
engineVersion: v2
type: Opaque
data: # We can create kubernetes secrets with the desired format.
default_user.conf: |
default_user = "{{ .username }}"
default_pass = "{{ .password }}"
username: "{{ .username }}"
password: "{{ .password }}"
data:
- secretKey: username
remoteRef:
key: observability/grafana/oncall-rabbitmq
property: username
- secretKey: password
remoteRef:
name: oncall-rabbitmq
dataFrom:
- extract:
conversionStrategy: Default
key: observability/grafana/oncall-rabbitmq
property: password
2 changes: 1 addition & 1 deletion observability/base/grafana-oncall/helmrelease-oncall.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ spec:
externalRedis:
host: oncall-valkey-master
port: 6379
username: user
username: default
existingSecret: "oncall-valkey"
passwordKey: password

Expand Down
116 changes: 116 additions & 0 deletions observability/base/grafana-oncall/helmrelease-rabbitmq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Based on https://grafana.com/docs/grafana-oncall/latest/setup/install/helm/install-scalable/
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: oncall-rabbitmq
spec:
releaseName: oncall-rabbitmq
driftDetection:
mode: enabled
chart:
spec:
chart: rabbitmq
sourceRef:
kind: HelmRepository
name: bitnami
namespace: flux-system
version: "15.0.3"
interval: 5m0s
timeout: 15m
install:
remediation:
retries: 3
values:
auth:
username: oncall
existingPasswordSecret: "oncall-rabbitmq"
existingSecretPasswordKey: "password"
existingErlangSecret: "oncall-rabbitmq"
existingSecretErlangKey: "erlang-cookie-secret"

# Todo: enable TLS
tls:
enabled: false

replicaCount: 1

resourcesPreset: "nano"

persistence:
storageClass: "gp3"
size: 8Gi

ingress:
enabled: false

networkPolicy:
## To be replaced with cilium network policy
enabled: true

metrics:
enabled: true
serviceMonitor:
default:
enabled: true
perObject:
enabled: true
detailed:
enabled: true

prometheusRule:
enabled: true
namespace: "observability"
rules:
- alert: RabbitmqDown
expr: rabbitmq_up{service="{{ template "common.names.fullname" . }}"} == 0
for: 5m
labels:
severity: error
annotations:
summary: Rabbitmq down (instance {{ "{{ $labels.instance }}" }})
description: RabbitMQ node down
- alert: ClusterDown
expr: |
sum(rabbitmq_running{service="{{ template "common.names.fullname" . }}"})
< {{ .Values.replicaCount }}
for: 5m
labels:
severity: error
annotations:
summary: Cluster down (instance {{ "{{ $labels.instance }}" }})
description: |
Less than {{ .Values.replicaCount }} nodes running in RabbitMQ cluster
VALUE = {{ "{{ $value }}" }}
- alert: ClusterPartition
expr: rabbitmq_partitions{service="{{ template "common.names.fullname" . }}"} > 0
for: 5m
labels:
severity: error
annotations:
summary: Cluster partition (instance {{ "{{ $labels.instance }}" }})
description: |
Cluster partition
VALUE = {{ "{{ $value }}" }}
- alert: OutOfMemory
expr: |
rabbitmq_node_mem_used{service="{{ template "common.names.fullname" . }}"}
/ rabbitmq_node_mem_limit{service="{{ template "common.names.fullname" . }}"}
* 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: Out of memory (instance {{ "{{ $labels.instance }}" }})
description: |
Memory available for RabbmitMQ is low (< 10%)\n VALUE = {{ "{{ $value }}" }}
LABELS: {{ "{{ $labels }}" }}
- alert: TooManyConnections
expr: rabbitmq_connectionsTotal{service="{{ template "common.names.fullname" . }}"} > 1000
for: 5m
labels:
severity: warning
annotations:
summary: Too many connections (instance {{ "{{ $labels.instance }}" }})
description: |
RabbitMQ instance has too many connections (> 1000)
VALUE = {{ "{{ $value }}" }}\n LABELS: {{ "{{ $labels }}" }}
15 changes: 15 additions & 0 deletions observability/base/grafana-oncall/httproute-oncall.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: oncall
namespace: observability
spec:
parentRefs:
- name: platform-private
namespace: infrastructure
hostnames:
- "oncall.priv.${domain_name}"
rules:
- backendRefs:
- name: oncall-engine
port: 8080
15 changes: 15 additions & 0 deletions observability/base/grafana-oncall/httproute-rabbitmq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: oncall-rabbitmq
namespace: observability
spec:
parentRefs:
- name: platform-private
namespace: infrastructure
hostnames:
- "oncall-rabbitmq.priv.${domain_name}"
rules:
- backendRefs:
- name: oncall-rabbitmq
port: 15672
4 changes: 3 additions & 1 deletion observability/base/grafana-oncall/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ resources:
- externalsecret-sqlinstance-password.yaml
- externalsecret-valkey.yaml
- helmrelease-oncall.yaml
- helmrelease-rabbitmq.yaml
- helmrelease-valkey.yaml
- rabbitmq.yaml
- httproute-oncall.yaml
- httproute-rabbitmq.yaml
- sqlinstance.yaml
9 changes: 0 additions & 9 deletions observability/base/grafana-oncall/rabbitmq.yaml

This file was deleted.

0 comments on commit 6d60ad4

Please sign in to comment.