Skip to content
This repository has been archived by the owner on Dec 4, 2024. It is now read-only.

Commit

Permalink
prometheus: bump prometheus-operator (#96)
Browse files Browse the repository at this point in the history
  • Loading branch information
branden authored and shaneutt committed Jan 24, 2020
1 parent 0159088 commit e1b34f6
Showing 1 changed file with 318 additions and 0 deletions.
318 changes: 318 additions & 0 deletions addons/prometheus/0.34.x/prometheus-2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
---
apiVersion: kubeaddons.mesosphere.io/v1beta1
kind: Addon
metadata:
name: prometheus
namespace: kubeaddons
labels:
kubeaddons.mesosphere.io/name: prometheus
# TODO: we're temporarily supporting dependency on an existing default storage class
# on the cluster, this hack will trigger re-queue on Addons until one exists.
kubeaddons.mesosphere.io/hack-requires-defaultstorageclass: "true"
annotations:
catalog.kubeaddons.mesosphere.io/addon-revision: "0.34.0-2"
appversion.kubeaddons.mesosphere.io/prometheus-operator: "0.34.0"
appversion.kubeaddons.mesosphere.io/prometheus: "2.14.0"
appversion.kubeaddons.mesosphere.io/alertmanager: "0.19.0"
appversion.kubeaddons.mesosphere.io/grafana: "6.4.2"
endpoint.kubeaddons.mesosphere.io/prometheus: "/ops/portal/prometheus"
endpoint.kubeaddons.mesosphere.io/alertmanager: "/ops/portal/alertmanager"
endpoint.kubeaddons.mesosphere.io/grafana: "/ops/portal/grafana"
docs.kubeaddons.mesosphere.io/prometheus: "https://prometheus.io/docs/introduction/overview/"
docs.kubeaddons.mesosphere.io/grafana: "https://grafana.com/docs/"
docs.kubeaddons.mesosphere.io/alertmanager: "https://prometheus.io/docs/alerting/alertmanager/"
values.chart.helm.kubeaddons.mesosphere.io/prometheus: "https://raw.githubusercontent.com/mesosphere/charts/a370c215c08ca7e50055902177141554de5444e6/staging/prometheus-operator/values.yaml"
helmv2.kubeaddons.mesosphere.io/upgrade-strategy: '[{"upgradeFrom": "<=5.19.7", "strategy": "delete"}]'
spec:
kubernetes:
minSupportedVersion: v1.15.6
cloudProvider:
- name: aws
enabled: true
- name: azure
enabled: true
- name: docker
enabled: false
- name: none
enabled: true
chartReference:
chart: prometheus-operator
repo: https://mesosphere.github.io/charts/staging
version: 8.3.9
values: |
---
defaultRules:
rules:
etcd: false
mesosphereResources:
create: true
rules:
etcd: true
# addon alert rules are defaulted to false to prevent potential misfires if addons
# are disabled.
velero: false
# This option grants the Prometheus pod the necessary permissions that the
# "get-cluster-id" container defined in prometheus.prometheusSpec.containers requires.
# see: https://github.com/mesosphere/kubeaddons-extrasteps/tree/master/pkg/prometheus/prometheus.go
enableAdditionalPrometheusRBACRules: true
prometheus:
service:
additionalPorts:
# Service port for Thanos gRPC.
- name: grpc
port: 10901
targetPort: grpc
additionalServiceMonitors:
- name: kubeaddons-service-monitor-metrics
selector:
matchLabels:
servicemonitor.kubeaddons.mesosphere.io/path: "metrics"
namespaceSelector:
matchNames:
- kubeaddons
- kommander
- velero
endpoints:
- port: metrics
interval: 30s
- port: monitoring
interval: 30s
# Service port for Thanos Querier, running in Kommander.
# If we ever add a Kommander-specific Prometheus, this
# endpoint should be removed and added to that Prometheus's
# configuration.
- targetPort: 10902
interval: 30s
- name: kubeaddons-service-monitor-api-v1-metrics-prometheus
selector:
matchLabels:
servicemonitor.kubeaddons.mesosphere.io/path: "api__v1__metrics__prometheus"
namespaceSelector:
matchNames:
- kubeaddons
endpoints:
- path: /api/v1/metrics/prometheus
port: metrics
interval: 30s
- name: kubeaddons-service-monitor-prometheus-metrics
selector:
matchLabels:
servicemonitor.kubeaddons.mesosphere.io/path: "prometheus__metrics"
namespaceSelector:
matchNames:
- kubeaddons
endpoints:
- path: /_prometheus/metrics
targetPort: 5601
interval: 30s
prometheusSpec:
image:
tag: v2.14.0
thanos:
version: v0.8.1
externalLabels:
cluster: $(CLUSTER_ID)
containers:
# note: in order to run this container, mesosphereResources.enableAdditionalPrometheusRBACRules
# needs to be set to true
- name: get-cluster-id
image: mesosphere/kubeaddons-addon-initializer:v0.1.5
command: ["/bin/bash", "-c", "addon-initializer prometheus && sleep infinity"]
env:
- name: "PROMETHEUS_NAMESPACE"
value: "kubeaddons"
- name: "CLUSTER_ID_CONFIGMAP_NAME"
value: "cluster-info-configmap"
- name: prometheus-config-reloader
envFrom:
- configMapRef:
name: cluster-info-configmap
additionalScrapeConfigs:
- job_name: 'kubernetes-nodes-containerd'
metrics_path: /v1/metrics
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:1338'
target_label: __address__
- job_name: 'gpu_metrics'
metrics_path: /gpu/metrics
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9400'
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_konvoy_mesosphere_com_gpu_provider]
regex: NVIDIA
action: keep
- job_name: 'kubernetes-calico-node'
metrics_path: /metrics
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- kube-system
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_k8s_app]
regex: calico-node
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
regex: .*metrics
action: keep
- source_labels: [__meta_kubernetes_pod_label_k8s_app]
target_label: name
action: replace
- source_labels: [__meta_kubernetes_pod_container_port_name]
target_label: endpoint
action: replace
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
action: replace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
action: replace
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
action: replace
- job_name: 'kubernetes-keepalived'
metrics_path: /snmp
params:
target: ["127.0.0.1:6161"]
module: ["keepalived"]
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- kube-system
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_port_protocol]
regex: TCP
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_number]
regex: "6161"
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
target_label: endpoint
action: replace
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
action: replace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
action: replace
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
action: replace
enableAdminAPI: true
secrets:
- etcd-certs
externalUrl: "/ops/portal/prometheus"
storageSpec:
volumeClaimTemplate:
metadata:
name: db
spec:
accessModes: ["ReadWriteOnce"]
# 50Gi is the default size for the chart
resources:
requests:
storage: 50Gi
resources:
limits:
cpu: 1000m
memory: 2500Mi
requests:
cpu: 300m
memory: 1500Mi
alertmanager:
alertmanagerSpec:
resources:
limits:
cpu: 100m
memory: 50Mi
requests:
cpu: 10m
memory: 50Mi
grafana:
grafana.ini:
server:
protocol: http
enable_gzip: true
root_url: "%(protocol)s://%(domain)s:%(http_port)s/ops/portal/grafana"
auth.proxy:
enabled: true
header_name: X-Forwarded-User
auto-sign-up: true
auth.basic:
enabled: false
users:
auto_assign_org_role: Admin
service:
type: ClusterIP
port: 3000
resources:
# keep request = limit to keep this container in guaranteed class
limits:
cpu: 300m
memory: 100Mi
requests:
cpu: 200m
memory: 100Mi
readinessProbe:
httpGet:
path: /api/health
port: 3000
scheme: HTTP
livenessProbe:
httpGet:
path: /api/health
port: 3000
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 30
failureThreshold: 10
rbac:
pspUseAppArmor: false
# to avoid needing to download any plugins at runtime, use a container and a shared volume
# do not enable the plugins here, instead rebuild the mesosphere/grafana-plugins image with the new plugins
plugins: []
# - grafana-piechart-panel
extraEmptyDirMounts:
- name: plugins
mountPath: /var/lib/grafana/plugins/
extraInitContainers:
- name: grafana-plugins-install
image: mesosphere/grafana-plugins:v0.0.1
command: ["/bin/sh", "-c", "cp -a /var/lib/grafana/plugins/. /var/lib/grafana/shared-plugins/"]
volumeMounts:
- name: plugins
mountPath: /var/lib/grafana/shared-plugins/
kubeEtcd:
enabled: true
serviceMonitor:
scheme: "https"
caFile: "/etc/prometheus/secrets/etcd-certs/ca.crt"
certFile: "/etc/prometheus/secrets/etcd-certs/server.crt"
keyFile: "/etc/prometheus/secrets/etcd-certs/server.key"
kube-state-metrics:
image:
# override the default k8s.gcr.io/kube-state-metrics repositry
# containerd mirror functionality does not support pulling these images
# TODO remove once https://github.com/containerd/containerd/issues/3756 is resolved
repository: quay.io/coreos/kube-state-metrics

0 comments on commit e1b34f6

Please sign in to comment.