diff --git a/apply-global-prometheus.sh b/apply-global-prometheus.sh index ec62229d..fabd2e22 100755 --- a/apply-global-prometheus.sh +++ b/apply-global-prometheus.sh @@ -26,15 +26,6 @@ CLUSTER=${CLUSTER:?Please provide cluster name: $USAGE} export GRAFANA_DOMAIN=status-${PROJECT}.measurementlab.net export ALERTMANAGER_URL=http://status-${PROJECT}.measurementlab.net:9093 -# Roles. -kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/roles" - -# Deployent dependencies. -kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/persistentvolumes" - -# Services. -kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/services" - # Config maps and Secrets ## Blackbox exporter. @@ -97,8 +88,29 @@ if [[ -n "${ALERTMANAGER_URL}" ]] ; then --dry-run -o json | kubectl apply -f - fi -# Deployments -kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/deployments" + +# Apply templates +if [[ -f "k8s/${CLUSTER}/${PROJECT}.yml" ]] ; then + + CFG=/tmp/${CLUSTER}-${PROJECT}.yml + kexpand expand --ignore-missing-keys k8s/${CLUSTER}/*/*.yml \ + -f k8s/${CLUSTER}/${PROJECT}.yml > ${CFG} + kubectl apply -f ${CFG} + +else + # TODO: remove when all project files support the new templates. + # Roles. + kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/roles" + + # Deployent dependencies. + kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/persistentvolumes" + + # Services. + kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/services" + + # Deployments + kubectl apply -f "k8s/${PROJECT}/${CLUSTER}/deployments" +fi # Reload configurations. If the deployment configuration has changed then this # request may fail becuase the container has already shutdown. diff --git a/k8s/mlab-oti/prometheus-federation/deployments/prometheus.yml b/k8s/mlab-oti/prometheus-federation/deployments/prometheus.yml deleted file mode 100644 index ab4b4696..00000000 --- a/k8s/mlab-oti/prometheus-federation/deployments/prometheus.yml +++ /dev/null @@ -1,161 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: prometheus-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: prometheus-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=prometheus-server should match a service config with a - # public IP and port so that it is publically accessible. - run: prometheus-server - annotations: - # Tell prometheus service discovery to scrape the pod containers. - prometheus.io/scrape: 'true' - spec: - # References a service account with RBAC permissions for accessing node - # metrics. This is required for k8s version 1.6+. - serviceAccountName: prometheus - - # Clusters running a prometheus instance must label nodes exclusively for - # use by prometheus. See README for steps to create a GKE cluster for - # prometheus. - nodeSelector: - prometheus-node: 'true' - - # When prometheus receives SIGTERM, it begins a new checkpoint. This can - # take longer than the default grace period of 30s. - terminationGracePeriodSeconds: 240 - - # Place the pod into the Guaranteed QoS by setting equal resource - # requests and limits for *all* containers in the pod. - # For more background, see: - # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-qos.md - containers: - # Check https://hub.docker.com/r/prom/prometheus/tags/ for the current - # stable version. - - image: prom/prometheus:v1.6.2 - # Note: the container name appears to be ignored and the actual pod name - # is derived from the Deployment.metadata.name. However, removing this - # value results in a configuration error. - name: prometheus - # Note: Set retention time to 120 days. (default retention is 30d). - args: ["-config.file=/etc/prometheus/prometheus.yml", - "-storage.local.path=/prometheus", - "-storage.local.retention=2880h", - "-alertmanager.url=http://alertmanager-public-service.default.svc.cluster.local:9093", - "-web.external-url=http://status-mlab-oti.measurementlab.net:9090", - "-web.console.libraries=/usr/share/prometheus/console_libraries", - "-web.console.templates=/usr/share/prometheus/consoles"] - ports: - - containerPort: 9090 - resources: - requests: - memory: "12Gi" - cpu: "6000m" - limits: - memory: "12Gi" - cpu: "6000m" - volumeMounts: - # /prometheus stores all metric data. Declared as VOLUME in base image. - - mountPath: /prometheus - name: prometheus-storage - subPath: prometheus-data - # /legacy-targets should contain legacy target configuration files. - - mountPath: /legacy-targets - name: prometheus-storage - subPath: legacy-targets - # /federation-targets should contain federation target config files. - - mountPath: /federation-targets - name: prometheus-storage - subPath: federation-targets - # /blackbox-targets should contain blackbox target config files. - - mountPath: /blackbox-targets - name: prometheus-storage - subPath: blackbox-targets - # /aeflex-targets should contain AppEngine target config files. - - mountPath: /aeflex-targets - name: prometheus-storage - subPath: aeflex-targets - # /snmp-targets should contain snmp_exporter target config files. - - mountPath: /snmp-targets - name: prometheus-storage - subPath: snmp-targets - # /etc/prometheus/prometheus.yml contains the M-Lab Prometheus config. - - mountPath: /etc/prometheus - name: prometheus-config - - # Run a node-exporter as part of the prometheus-server pod so that it has - # access to the same namespace and volumes as the prometheus-server. This - # allows simple disk usage monitoring of the "/prometheus" mount point. - - image: prom/node-exporter:v0.13.0 - name: node-exporter - # Note: only enable the filesystem collector, and ignore system paths. - args: [ "--collectors.enabled=filesystem", - "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($|/)"] - ports: - - containerPort: 9100 - resources: - requests: - memory: "10Mi" - cpu: "50m" - limits: - memory: "10Mi" - cpu: "50m" - volumeMounts: - - mountPath: /prometheus - name: prometheus-storage - - - image: measurementlab/gcp-service-discovery - name: service-discovery - env: - - name: GCLOUD_PROJECT - valueFrom: - configMapKeyRef: - name: prometheus-federation-config - key: gcloud-project - args: [ "--aef-target=/targets/aeflex-targets/aeflex.json", - "--gke-target=/targets/federation-targets/prometheus-clusters.json", - "--http-target=/targets/legacy-targets/sidestream.json", - "--http-target=/targets/blackbox-targets/ssh806.json", - "--http-target=/targets/blackbox-targets/rsyncd.json", - "--http-target=/targets/snmp-targets/snmpexporter.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/legacy-targets/sidestream.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/blackbox-targets/ssh806.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/blackbox-targets/rsyncd.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/snmp-targets/snmpexporter.json", - "--project=$(GCLOUD_PROJECT)"] - resources: - requests: - memory: "150Mi" - cpu: "50m" - limits: - memory: "150Mi" - cpu: "50m" - volumeMounts: - # Mount the the prometheus-storage for write access to the target - # directories. - - mountPath: /targets - name: prometheus-storage - - # Disks created manually, can be named here explicitly using - # gcePersistentDisk instead of the persistentVolumeClaim. - volumes: - - name: prometheus-storage - persistentVolumeClaim: - claimName: auto-prometheus-disk0 - - name: prometheus-config - configMap: - name: prometheus-federation-config diff --git a/k8s/mlab-oti/prometheus-federation/persistentvolumes/persistent-volumes.yml b/k8s/mlab-oti/prometheus-federation/persistentvolumes/persistent-volumes.yml deleted file mode 100644 index c4132316..00000000 --- a/k8s/mlab-oti/prometheus-federation/persistentvolumes/persistent-volumes.yml +++ /dev/null @@ -1,64 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-prometheus-disk0 - annotations: - volume.beta.kubernetes.io/storage-class: "slow" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 400Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-grafana-disk0 - annotations: - volume.beta.kubernetes.io/storage-class: "slow" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-alertmanager-disk0 - annotations: - volume.beta.kubernetes.io/storage-class: "slow" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-prometheus-ssd0 - annotations: - volume.beta.kubernetes.io/storage-class: "fast" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 200Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-prometheus-ssd1 - annotations: - volume.beta.kubernetes.io/storage-class: "fast" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 200Gi diff --git a/k8s/mlab-oti/prometheus-federation/services/alertmanager-public-service.yml b/k8s/mlab-oti/prometheus-federation/services/alertmanager-public-service.yml deleted file mode 100644 index e464716f..00000000 --- a/k8s/mlab-oti/prometheus-federation/services/alertmanager-public-service.yml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - prometheus.io/scrape: 'true' - prometheus.io/port: '9093' - name: alertmanager-public-service - namespace: default -spec: - ports: - - port: 9093 - protocol: TCP - targetPort: 9093 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: alertmanager-server - sessionAffinity: None - # Use the same static IP as used for Prometheus. - externalIPs: - # Use the same IP as above, since we're on a different port. - - 35.184.81.106 - type: ClusterIP diff --git a/k8s/mlab-oti/prometheus-federation/services/blackbox-public-service.yml b/k8s/mlab-oti/prometheus-federation/services/blackbox-public-service.yml deleted file mode 100644 index cf2b0432..00000000 --- a/k8s/mlab-oti/prometheus-federation/services/blackbox-public-service.yml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - # The grafana web server does not export any prometheus metrics. - prometheus.io/scrape: 'false' - name: blackbox-public-service - namespace: default -spec: - ports: - - port: 9115 - protocol: TCP - targetPort: 9115 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: blackbox-server - sessionAffinity: None - # Use the same static IP as used for Prometheus. - externalIPs: - # Use the same IP as above, since we're on a different port. - - 35.184.81.106 - type: ClusterIP diff --git a/k8s/mlab-oti/prometheus-federation/services/prometheus-public-service.yml b/k8s/mlab-oti/prometheus-federation/services/prometheus-public-service.yml deleted file mode 100644 index b506212f..00000000 --- a/k8s/mlab-oti/prometheus-federation/services/prometheus-public-service.yml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: prometheus-public-service - namespace: default -spec: - ports: - - port: 9090 - protocol: TCP - targetPort: 9090 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: prometheus-server - sessionAffinity: None - # Allocate a static IP manually in GCP console: Networking -> Load Balancing. - externalIPs: - - 35.184.81.106 - type: ClusterIP diff --git a/k8s/mlab-oti/prometheus-federation/services/pushgateway-public-service.yml b/k8s/mlab-oti/prometheus-federation/services/pushgateway-public-service.yml deleted file mode 100644 index d3a85c27..00000000 --- a/k8s/mlab-oti/prometheus-federation/services/pushgateway-public-service.yml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: pushgateway-public-service - namespace: default -spec: - ports: - - port: 9091 - protocol: TCP - targetPort: 9091 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: pushgateway-server - sessionAffinity: None - # Use the same static IP as used for Prometheus. - externalIPs: - # Use the same IP as above, since we're on a different port. - - 35.184.81.106 - type: ClusterIP diff --git a/k8s/mlab-sandbox/prometheus-federation/deployments/blackbox.yml b/k8s/mlab-sandbox/prometheus-federation/deployments/blackbox.yml deleted file mode 100644 index 80496174..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/deployments/blackbox.yml +++ /dev/null @@ -1,60 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: blackbox-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: blackbox-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=blackbox-server should match a service config with a - # public IP and port so that it is publically accessible. - run: blackbox-server - annotations: - # Tell prometheus service discovery to scrape the blackbox container. - prometheus.io/scrape: 'true' - spec: - # Place the pod into the Guaranteed QoS by setting equal resource - # requests and limits for *all* containers in the pod. - # For more background, see: - # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-qos.md - containers: - # Check https://hub.docker.com/r/prom/blackbox-exporter/tags/ for the current - # stable version. - - image: prom/blackbox-exporter:v0.4.0 - # Note: the container name appears to be ignored and the actual pod name - # is derived from the Deployment.metadata.name. However, removing this - # value results in a configuration error. - name: blackbox-server - args: ["-config.file=/etc/blackbox/config.yml"] - ports: - - containerPort: 9115 - resources: - requests: - memory: "100Mi" - cpu: "100m" - limits: - memory: "100Mi" - cpu: "100m" - volumeMounts: - # /etc/blackbox/config.yml contains the M-Lab Prometheus config. - - mountPath: /etc/blackbox - name: blackbox-config - - # Disks created manually, can be named here explicitly using - # gcePersistentDisk instead of the persistentVolumeClaim. - volumes: - - name: blackbox-config - configMap: - name: blackbox-config diff --git a/k8s/mlab-sandbox/prometheus-federation/deployments/grafana.yml b/k8s/mlab-sandbox/prometheus-federation/deployments/grafana.yml deleted file mode 100644 index 56b42811..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/deployments/grafana.yml +++ /dev/null @@ -1,76 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: grafana-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: grafana-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=grafana-server should match a service config with a - # public IP and port so that it is publically accessible. - run: grafana-server - spec: - containers: - # Check https://hub.docker.com/r/grafana/grafana/tags/ for the current - # stable version. - - image: grafana/grafana:4.5.2 - name: grafana-server - env: - - name: GF_SECURITY_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-secrets - key: admin-password - - name: GF_SERVER_DOMAIN - # The public facing domain name used to access grafana from a browser. - # The domain value is used in alert URLs generated by Grafana. - valueFrom: - configMapKeyRef: - name: grafana-env - key: domain - - name: GF_AUTH_GOOGLE_CLIENT_SECRET - valueFrom: - configMapKeyRef: - name: grafana-env - key: gf_auth_google_client_secret - - name: GF_AUTH_GOOGLE_CLIENT_ID - valueFrom: - configMapKeyRef: - name: grafana-env - key: gf_auth_google_client_id - - ports: - - containerPort: 3000 - resources: - requests: - memory: "1Gi" - cpu: "200m" - # TODO: add support for SSL certificates, so logins are secure. - volumeMounts: - # Default configs place an sqlite3 database in /var/lib/grafana - - mountPath: /var/lib/grafana - name: grafana-storage - # /etc/grafana/* should contain the M-Lab Grafana configs. - - mountPath: /etc/grafana - name: grafana-config - # Disks created manually, can be named here explicitly using - # gcePersistentDisk instead of the persistentVolumeClaim. - volumes: - - name: grafana-storage - persistentVolumeClaim: - claimName: auto-grafana-disk0 - - name: grafana-config - configMap: - name: grafana-config diff --git a/k8s/mlab-sandbox/prometheus-federation/deployments/kube-state-metrics.yml b/k8s/mlab-sandbox/prometheus-federation/deployments/kube-state-metrics.yml deleted file mode 100644 index 2d463871..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/deployments/kube-state-metrics.yml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: kube-state-metrics -spec: - replicas: 1 - template: - metadata: - labels: - application: kube-state-metrics - version: "v0.5.0" - annotations: - prometheus.io/scrape: 'true' - spec: - nodeSelector: - prometheus-node: 'true' - containers: - - name: kube-state-metrics - image: gcr.io/google_containers/kube-state-metrics:v0.5.0 - ports: - - containerPort: 8080 diff --git a/k8s/mlab-sandbox/prometheus-federation/deployments/pushgateway.yml b/k8s/mlab-sandbox/prometheus-federation/deployments/pushgateway.yml deleted file mode 100644 index 41c2808d..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/deployments/pushgateway.yml +++ /dev/null @@ -1,51 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: pushgateway-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: pushgateway-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=pushgateway-server should match a service config with a - # public IP and port so that it is publically accessible. - run: pushgateway-server - spec: - # Place the pod into the Guaranteed QoS by setting equal resource - # requests and limits for *all* containers in the pod. - # For more background, see: - # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-qos.md - containers: - # Check https://hub.docker.com/r/prom/pushgateway/tags/ for the current - # stable version. - - image: prom/pushgateway:v0.3.1 - name: pushgateway-server - - # NOTE: push gateway metrics do not expire. Once pushed to the gateway - # they remain indefinitely until the gateway restarts or they are - # overwritten. For a fixed label set this is okay. However, if the set - # of instance names grows over time without reuse, then the gateway - # will persist that information indefinitely, but it won't be helpful. - # So, do not use file persistence until we're sure it's what we need. - # args: ["-persistence.file=/pushgateway/metrics.dat", - # "-persistence.interval=1m"] - ports: - - containerPort: 9091 - resources: - requests: - memory: "400Mi" - cpu: "200m" - limits: - memory: "400Mi" - cpu: "200m" diff --git a/k8s/mlab-sandbox/prometheus-federation/persistentvolumes/storage-class.yml b/k8s/mlab-sandbox/prometheus-federation/persistentvolumes/storage-class.yml deleted file mode 100644 index a07f32de..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/persistentvolumes/storage-class.yml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: storage.k8s.io/v1beta1 -kind: StorageClass -metadata: - name: slow -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-standard ---- -apiVersion: storage.k8s.io/v1beta1 -kind: StorageClass -metadata: - name: fast -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-ssd diff --git a/k8s/mlab-sandbox/prometheus-federation/roles/rbac-prometheus.yml b/k8s/mlab-sandbox/prometheus-federation/roles/rbac-prometheus.yml deleted file mode 100644 index f86ebd68..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/roles/rbac-prometheus.yml +++ /dev/null @@ -1,38 +0,0 @@ -# Add a cluster role for access to the v1.6 node/metrics resource. -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - name: prometheus -rules: -- apiGroups: [""] - resources: - - nodes - - nodes/proxy - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -# Define a service account under which the prometheus server runs. -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus - namespace: default ---- -# Bind the cluster role above to the service account, granting this account -# permission to the resources defined by the role. -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: prometheus -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus -subjects: -- kind: ServiceAccount - name: prometheus - namespace: default diff --git a/k8s/mlab-sandbox/prometheus-federation/services/blackbox-public-service.yml b/k8s/mlab-sandbox/prometheus-federation/services/blackbox-public-service.yml deleted file mode 100644 index 640494e1..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/services/blackbox-public-service.yml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - # The grafana web server does not export any prometheus metrics. - prometheus.io/scrape: 'false' - name: blackbox-public-service - namespace: default -spec: - ports: - - port: 9115 - protocol: TCP - targetPort: 9115 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: blackbox-server - sessionAffinity: None - # Use the same static IP as used for Prometheus. - externalIPs: - # Use the same IP as above, since we're on a different port. - - 35.184.166.181 - type: ClusterIP diff --git a/k8s/mlab-sandbox/prometheus-federation/services/grafana-public-service.yml b/k8s/mlab-sandbox/prometheus-federation/services/grafana-public-service.yml deleted file mode 100644 index af174f6d..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/services/grafana-public-service.yml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - # The grafana web server does not export any prometheus metrics. - prometheus.io/scrape: 'false' - name: grafana-public-service - namespace: default -spec: - ports: - - port: 3000 - protocol: TCP - targetPort: 3000 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: grafana-server - sessionAffinity: None - # Use the same static IP as used for Prometheus. - externalIPs: - # Use the same IP as above, since we're on a different port. - - 35.184.166.181 - type: ClusterIP diff --git a/k8s/mlab-sandbox/prometheus-federation/services/pushgateway-public-service.yml b/k8s/mlab-sandbox/prometheus-federation/services/pushgateway-public-service.yml deleted file mode 100644 index 886d2087..00000000 --- a/k8s/mlab-sandbox/prometheus-federation/services/pushgateway-public-service.yml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: pushgateway-public-service - namespace: default -spec: - ports: - - port: 9091 - protocol: TCP - targetPort: 9091 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: pushgateway-server - sessionAffinity: None - # Use the same static IP as used for Prometheus. - externalIPs: - # Use the same IP as above, since we're on a different port. - - 35.184.166.181 - type: ClusterIP diff --git a/k8s/mlab-staging/prometheus-federation/deployments/alertmanager.yml b/k8s/mlab-staging/prometheus-federation/deployments/alertmanager.yml deleted file mode 100644 index c66b57c5..00000000 --- a/k8s/mlab-staging/prometheus-federation/deployments/alertmanager.yml +++ /dev/null @@ -1,71 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: alertmanager-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: alertmanager-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=alertmanager-server should match a service config with a - # public IP and port so that it is publically accessible. - run: alertmanager-server - spec: - # Place the pod into the Guaranteed QoS by setting equal resource - # requests and limits for *all* containers in the pod. - # For more background, see: - # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-qos.md - containers: - # Check https://hub.docker.com/r/prom/alertmanager/tags/ for the current - # stable version. - - image: prom/alertmanager:v0.7.1 - name: alertmanager-server - env: - - name: ALERTMANAGER_EXTERNAL_URL - # A public domain name used to access alertmanager from a browser. - # The external url is used in alert URLs generated by alertmanager. - valueFrom: - configMapKeyRef: - name: alertmanager-env - key: external-url - args: ["-config.file=/etc/alertmanager/config.yml", - "-web.external-url=$(ALERTMANAGER_EXTERNAL_URL)", - "-storage.path=/alertmanager"] - ports: - - containerPort: 9093 - resources: - requests: - memory: "400Mi" - cpu: "200m" - limits: - memory: "400Mi" - cpu: "200m" - volumeMounts: - # /alertmanager stores alert state, like acks, silences, etc. - - mountPath: /alertmanager - name: alertmanager-storage - subPath: alertmanager-data - # /etc/alertmanager/config.yml contains the M-Lab alertmanager config. - - mountPath: /etc/alertmanager - name: alertmanager-config - - # Disks created manually, can be named here explicitly using - # gcePersistentDisk instead of the persistentVolumeClaim. - volumes: - - name: alertmanager-storage - persistentVolumeClaim: - claimName: auto-alertmanager-disk0 - - name: alertmanager-config - configMap: - name: alertmanager-config diff --git a/k8s/mlab-staging/prometheus-federation/deployments/blackbox.yml b/k8s/mlab-staging/prometheus-federation/deployments/blackbox.yml deleted file mode 100644 index 80496174..00000000 --- a/k8s/mlab-staging/prometheus-federation/deployments/blackbox.yml +++ /dev/null @@ -1,60 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: blackbox-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: blackbox-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=blackbox-server should match a service config with a - # public IP and port so that it is publically accessible. - run: blackbox-server - annotations: - # Tell prometheus service discovery to scrape the blackbox container. - prometheus.io/scrape: 'true' - spec: - # Place the pod into the Guaranteed QoS by setting equal resource - # requests and limits for *all* containers in the pod. - # For more background, see: - # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-qos.md - containers: - # Check https://hub.docker.com/r/prom/blackbox-exporter/tags/ for the current - # stable version. - - image: prom/blackbox-exporter:v0.4.0 - # Note: the container name appears to be ignored and the actual pod name - # is derived from the Deployment.metadata.name. However, removing this - # value results in a configuration error. - name: blackbox-server - args: ["-config.file=/etc/blackbox/config.yml"] - ports: - - containerPort: 9115 - resources: - requests: - memory: "100Mi" - cpu: "100m" - limits: - memory: "100Mi" - cpu: "100m" - volumeMounts: - # /etc/blackbox/config.yml contains the M-Lab Prometheus config. - - mountPath: /etc/blackbox - name: blackbox-config - - # Disks created manually, can be named here explicitly using - # gcePersistentDisk instead of the persistentVolumeClaim. - volumes: - - name: blackbox-config - configMap: - name: blackbox-config diff --git a/k8s/mlab-staging/prometheus-federation/deployments/grafana.yml b/k8s/mlab-staging/prometheus-federation/deployments/grafana.yml deleted file mode 100644 index 56b42811..00000000 --- a/k8s/mlab-staging/prometheus-federation/deployments/grafana.yml +++ /dev/null @@ -1,76 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: grafana-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: grafana-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=grafana-server should match a service config with a - # public IP and port so that it is publically accessible. - run: grafana-server - spec: - containers: - # Check https://hub.docker.com/r/grafana/grafana/tags/ for the current - # stable version. - - image: grafana/grafana:4.5.2 - name: grafana-server - env: - - name: GF_SECURITY_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-secrets - key: admin-password - - name: GF_SERVER_DOMAIN - # The public facing domain name used to access grafana from a browser. - # The domain value is used in alert URLs generated by Grafana. - valueFrom: - configMapKeyRef: - name: grafana-env - key: domain - - name: GF_AUTH_GOOGLE_CLIENT_SECRET - valueFrom: - configMapKeyRef: - name: grafana-env - key: gf_auth_google_client_secret - - name: GF_AUTH_GOOGLE_CLIENT_ID - valueFrom: - configMapKeyRef: - name: grafana-env - key: gf_auth_google_client_id - - ports: - - containerPort: 3000 - resources: - requests: - memory: "1Gi" - cpu: "200m" - # TODO: add support for SSL certificates, so logins are secure. - volumeMounts: - # Default configs place an sqlite3 database in /var/lib/grafana - - mountPath: /var/lib/grafana - name: grafana-storage - # /etc/grafana/* should contain the M-Lab Grafana configs. - - mountPath: /etc/grafana - name: grafana-config - # Disks created manually, can be named here explicitly using - # gcePersistentDisk instead of the persistentVolumeClaim. - volumes: - - name: grafana-storage - persistentVolumeClaim: - claimName: auto-grafana-disk0 - - name: grafana-config - configMap: - name: grafana-config diff --git a/k8s/mlab-staging/prometheus-federation/deployments/kube-state-metrics.yml b/k8s/mlab-staging/prometheus-federation/deployments/kube-state-metrics.yml deleted file mode 100644 index 2d463871..00000000 --- a/k8s/mlab-staging/prometheus-federation/deployments/kube-state-metrics.yml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: kube-state-metrics -spec: - replicas: 1 - template: - metadata: - labels: - application: kube-state-metrics - version: "v0.5.0" - annotations: - prometheus.io/scrape: 'true' - spec: - nodeSelector: - prometheus-node: 'true' - containers: - - name: kube-state-metrics - image: gcr.io/google_containers/kube-state-metrics:v0.5.0 - ports: - - containerPort: 8080 diff --git a/k8s/mlab-staging/prometheus-federation/deployments/prometheus.yml b/k8s/mlab-staging/prometheus-federation/deployments/prometheus.yml deleted file mode 100644 index 91aa0606..00000000 --- a/k8s/mlab-staging/prometheus-federation/deployments/prometheus.yml +++ /dev/null @@ -1,161 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: prometheus-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: prometheus-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=prometheus-server should match a service config with a - # public IP and port so that it is publically accessible. - run: prometheus-server - annotations: - # Tell prometheus service discovery to scrape the pod containers. - prometheus.io/scrape: 'true' - spec: - # References a service account with RBAC permissions for accessing node - # metrics. This is required for k8s version 1.6+. - serviceAccountName: prometheus - - # Clusters running a prometheus instance must label nodes exclusively for - # use by prometheus. See README for steps to create a GKE cluster for - # prometheus. - nodeSelector: - prometheus-node: 'true' - - # When prometheus receives SIGTERM, it begins a new checkpoint. This can - # take longer than the default grace period of 30s. - terminationGracePeriodSeconds: 240 - - # Place the pod into the Guaranteed QoS by setting equal resource - # requests and limits for *all* containers in the pod. - # For more background, see: - # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-qos.md - containers: - # Check https://hub.docker.com/r/prom/prometheus/tags/ for the current - # stable version. - - image: prom/prometheus:v1.6.2 - # Note: the container name appears to be ignored and the actual pod name - # is derived from the Deployment.metadata.name. However, removing this - # value results in a configuration error. - name: prometheus - # Note: Set retention time to 120 days. (default retention is 30d). - args: ["-config.file=/etc/prometheus/prometheus.yml", - "-storage.local.path=/prometheus", - "-storage.local.retention=2880h", - "-alertmanager.url=http://alertmanager-public-service.default.svc.cluster.local:9093", - "-web.external-url=http://status-mlab-staging.measurementlab.net:9090", - "-web.console.libraries=/usr/share/prometheus/console_libraries", - "-web.console.templates=/usr/share/prometheus/consoles"] - ports: - - containerPort: 9090 - resources: - requests: - memory: "12Gi" - cpu: "6000m" - limits: - memory: "12Gi" - cpu: "6000m" - volumeMounts: - # /prometheus stores all metric data. Declared as VOLUME in base image. - - mountPath: /prometheus - name: prometheus-storage - subPath: prometheus-data - # /legacy-targets should contain legacy target configuration files. - - mountPath: /legacy-targets - name: prometheus-storage - subPath: legacy-targets - # /federation-targets should contain federation target config files. - - mountPath: /federation-targets - name: prometheus-storage - subPath: federation-targets - # /blackbox-targets should contain blackbox target config files. - - mountPath: /blackbox-targets - name: prometheus-storage - subPath: blackbox-targets - # /aeflex-targets should contain AppEngine target config files. - - mountPath: /aeflex-targets - name: prometheus-storage - subPath: aeflex-targets - # /snmp-targets should contain snmp_exporter target config files. - - mountPath: /snmp-targets - name: prometheus-storage - subPath: snmp-targets - # /etc/prometheus/prometheus.yml contains the M-Lab Prometheus config. - - mountPath: /etc/prometheus - name: prometheus-config - - # Run a node-exporter as part of the prometheus-server pod so that it has - # access to the same namespace and volumes as the prometheus-server. This - # allows simple disk usage monitoring of the "/prometheus" mount point. - - image: prom/node-exporter:v0.13.0 - name: node-exporter - # Note: only enable the filesystem collector, and ignore system paths. - args: [ "--collectors.enabled=filesystem", - "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($|/)"] - ports: - - containerPort: 9100 - resources: - requests: - memory: "10Mi" - cpu: "50m" - limits: - memory: "10Mi" - cpu: "50m" - volumeMounts: - - mountPath: /prometheus - name: prometheus-storage - - - image: measurementlab/gcp-service-discovery - name: service-discovery - env: - - name: GCLOUD_PROJECT - valueFrom: - configMapKeyRef: - name: prometheus-federation-config - key: gcloud-project - args: [ "--aef-target=/targets/aeflex-targets/aeflex.json", - "--gke-target=/targets/federation-targets/prometheus-clusters.json", - "--http-target=/targets/legacy-targets/sidestream.json", - "--http-target=/targets/blackbox-targets/ssh806.json", - "--http-target=/targets/blackbox-targets/rsyncd.json", - "--http-target=/targets/snmp-targets/snmpexporter.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/legacy-targets/sidestream.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/blackbox-targets/ssh806.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/blackbox-targets/rsyncd.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/snmp-targets/snmpexporter.json", - "--project=$(GCLOUD_PROJECT)"] - resources: - requests: - memory: "150Mi" - cpu: "50m" - limits: - memory: "150Mi" - cpu: "50m" - volumeMounts: - # Mount the the prometheus-storage for write access to the target - # directories. - - mountPath: /targets - name: prometheus-storage - - # Disks created manually, can be named here explicitly using - # gcePersistentDisk instead of the persistentVolumeClaim. - volumes: - - name: prometheus-storage - persistentVolumeClaim: - claimName: auto-prometheus-disk0 - - name: prometheus-config - configMap: - name: prometheus-federation-config diff --git a/k8s/mlab-staging/prometheus-federation/deployments/pushgateway.yml b/k8s/mlab-staging/prometheus-federation/deployments/pushgateway.yml deleted file mode 100644 index 41c2808d..00000000 --- a/k8s/mlab-staging/prometheus-federation/deployments/pushgateway.yml +++ /dev/null @@ -1,51 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: pushgateway-server - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - # Used to match pre-existing pods that may be affected during updates. - run: pushgateway-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - # Pod template. - template: - metadata: - labels: - # Note: run=pushgateway-server should match a service config with a - # public IP and port so that it is publically accessible. - run: pushgateway-server - spec: - # Place the pod into the Guaranteed QoS by setting equal resource - # requests and limits for *all* containers in the pod. - # For more background, see: - # https://github.com/kubernetes/community/blob/master/contributors/design-proposals/resource-qos.md - containers: - # Check https://hub.docker.com/r/prom/pushgateway/tags/ for the current - # stable version. - - image: prom/pushgateway:v0.3.1 - name: pushgateway-server - - # NOTE: push gateway metrics do not expire. Once pushed to the gateway - # they remain indefinitely until the gateway restarts or they are - # overwritten. For a fixed label set this is okay. However, if the set - # of instance names grows over time without reuse, then the gateway - # will persist that information indefinitely, but it won't be helpful. - # So, do not use file persistence until we're sure it's what we need. - # args: ["-persistence.file=/pushgateway/metrics.dat", - # "-persistence.interval=1m"] - ports: - - containerPort: 9091 - resources: - requests: - memory: "400Mi" - cpu: "200m" - limits: - memory: "400Mi" - cpu: "200m" diff --git a/k8s/mlab-staging/prometheus-federation/persistentvolumes/persistent-volumes.yml b/k8s/mlab-staging/prometheus-federation/persistentvolumes/persistent-volumes.yml deleted file mode 100644 index ab4f6b7f..00000000 --- a/k8s/mlab-staging/prometheus-federation/persistentvolumes/persistent-volumes.yml +++ /dev/null @@ -1,38 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-prometheus-disk0 - annotations: - volume.beta.kubernetes.io/storage-class: "slow" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 400Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-grafana-disk0 - annotations: - volume.beta.kubernetes.io/storage-class: "slow" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: auto-alertmanager-disk0 - annotations: - volume.beta.kubernetes.io/storage-class: "slow" -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi diff --git a/k8s/mlab-staging/prometheus-federation/persistentvolumes/storage-class.yml b/k8s/mlab-staging/prometheus-federation/persistentvolumes/storage-class.yml deleted file mode 100644 index a07f32de..00000000 --- a/k8s/mlab-staging/prometheus-federation/persistentvolumes/storage-class.yml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: storage.k8s.io/v1beta1 -kind: StorageClass -metadata: - name: slow -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-standard ---- -apiVersion: storage.k8s.io/v1beta1 -kind: StorageClass -metadata: - name: fast -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-ssd diff --git a/k8s/mlab-staging/prometheus-federation/roles/rbac-prometheus.yml b/k8s/mlab-staging/prometheus-federation/roles/rbac-prometheus.yml deleted file mode 100644 index f86ebd68..00000000 --- a/k8s/mlab-staging/prometheus-federation/roles/rbac-prometheus.yml +++ /dev/null @@ -1,38 +0,0 @@ -# Add a cluster role for access to the v1.6 node/metrics resource. -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - name: prometheus -rules: -- apiGroups: [""] - resources: - - nodes - - nodes/proxy - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -# Define a service account under which the prometheus server runs. -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus - namespace: default ---- -# Bind the cluster role above to the service account, granting this account -# permission to the resources defined by the role. -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: prometheus -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus -subjects: -- kind: ServiceAccount - name: prometheus - namespace: default diff --git a/k8s/mlab-staging/prometheus-federation/services/grafana-public-service.yml b/k8s/mlab-staging/prometheus-federation/services/grafana-public-service.yml deleted file mode 100644 index f0804051..00000000 --- a/k8s/mlab-staging/prometheus-federation/services/grafana-public-service.yml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - # The grafana web server does not export any prometheus metrics. - prometheus.io/scrape: 'false' - name: grafana-public-service - namespace: default -spec: - ports: - - port: 3000 - protocol: TCP - targetPort: 3000 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: grafana-server - sessionAffinity: None - # Use the same static IP as used for Prometheus. - externalIPs: - # Use the same IP as above, since we're on a different port. - - 35.185.76.159 - type: ClusterIP diff --git a/k8s/mlab-staging/prometheus-federation/services/prometheus-public-service.yml b/k8s/mlab-staging/prometheus-federation/services/prometheus-public-service.yml deleted file mode 100644 index 8f849705..00000000 --- a/k8s/mlab-staging/prometheus-federation/services/prometheus-public-service.yml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: prometheus-public-service - namespace: default -spec: - ports: - - port: 9090 - protocol: TCP - targetPort: 9090 - selector: - # Pods with labels matching this key/value pair will be publically - # accessible through the service IP and port. - run: prometheus-server - sessionAffinity: None - # Allocate a static IP manually in GCP console: Networking -> Load Balancing. - externalIPs: - - 35.185.76.159 - type: ClusterIP diff --git a/k8s/mlab-oti/prometheus-federation/deployments/alertmanager.yml b/k8s/prometheus-federation/deployments/alertmanager.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/deployments/alertmanager.yml rename to k8s/prometheus-federation/deployments/alertmanager.yml diff --git a/k8s/mlab-oti/prometheus-federation/deployments/blackbox.yml b/k8s/prometheus-federation/deployments/blackbox.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/deployments/blackbox.yml rename to k8s/prometheus-federation/deployments/blackbox.yml diff --git a/k8s/mlab-oti/prometheus-federation/deployments/github-receiver.yml b/k8s/prometheus-federation/deployments/github-receiver.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/deployments/github-receiver.yml rename to k8s/prometheus-federation/deployments/github-receiver.yml diff --git a/k8s/mlab-oti/prometheus-federation/deployments/grafana.yml b/k8s/prometheus-federation/deployments/grafana.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/deployments/grafana.yml rename to k8s/prometheus-federation/deployments/grafana.yml diff --git a/k8s/mlab-oti/prometheus-federation/deployments/kube-state-metrics.yml b/k8s/prometheus-federation/deployments/kube-state-metrics.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/deployments/kube-state-metrics.yml rename to k8s/prometheus-federation/deployments/kube-state-metrics.yml diff --git a/k8s/mlab-sandbox/prometheus-federation/deployments/prometheus.yml b/k8s/prometheus-federation/deployments/prometheus.yml similarity index 89% rename from k8s/mlab-sandbox/prometheus-federation/deployments/prometheus.yml rename to k8s/prometheus-federation/deployments/prometheus.yml index 596b21de..e651f8fd 100644 --- a/k8s/mlab-sandbox/prometheus-federation/deployments/prometheus.yml +++ b/k8s/prometheus-federation/deployments/prometheus.yml @@ -56,18 +56,18 @@ spec: "-storage.local.path=/prometheus", "-storage.local.retention=2880h", "-alertmanager.url=http://alertmanager-public-service.default.svc.cluster.local:9093", - "-web.external-url=http://status-mlab-sandbox.measurementlab.net:9090", + "-web.external-url=http://status-{{GCLOUD_PROJECT}}.measurementlab.net:9090", "-web.console.libraries=/usr/share/prometheus/console_libraries", "-web.console.templates=/usr/share/prometheus/consoles"] ports: - containerPort: 9090 resources: requests: - memory: "12Gi" - cpu: "3000m" + memory: "{{PROMETHEUS_RAM}}" + cpu: "{{PROMETHEUS_CPU}}" limits: - memory: "12Gi" - cpu: "3000m" + memory: "{{PROMETHEUS_RAM}}" + cpu: "{{PROMETHEUS_CPU}}" volumeMounts: # /prometheus stores all metric data. Declared as VOLUME in base image. - mountPath: /prometheus @@ -120,12 +120,6 @@ spec: - image: measurementlab/gcp-service-discovery name: service-discovery - env: - - name: GCLOUD_PROJECT - valueFrom: - configMapKeyRef: - name: prometheus-federation-config - key: gcloud-project args: [ "--aef-target=/targets/aeflex-targets/aeflex.json", "--gke-target=/targets/federation-targets/prometheus-clusters.json", "--http-target=/targets/legacy-targets/sidestream.json", @@ -133,12 +127,12 @@ spec: "--http-target=/targets/blackbox-targets/ssh806.json", "--http-target=/targets/blackbox-targets/rsyncd.json", "--http-target=/targets/snmp-targets/snmpexporter.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/legacy-targets/sidestream.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/legacy-targets/lameduck.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/blackbox-targets/ssh806.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/blackbox-targets/rsyncd.json", - "--http-source=https://storage.googleapis.com/operator-$(GCLOUD_PROJECT)/prometheus/snmp-targets/snmpexporter.json", - "--project=$(GCLOUD_PROJECT)"] + "--http-source=https://storage.googleapis.com/operator-{{GCLOUD_PROJECT}}/prometheus/legacy-targets/sidestream.json", + "--http-source=https://storage.googleapis.com/operator-{{GCLOUD_PROJECT}}/prometheus/legacy-targets/lameduck.json", + "--http-source=https://storage.googleapis.com/operator-{{GCLOUD_PROJECT}}/prometheus/blackbox-targets/ssh806.json", + "--http-source=https://storage.googleapis.com/operator-{{GCLOUD_PROJECT}}/prometheus/blackbox-targets/rsyncd.json", + "--http-source=https://storage.googleapis.com/operator-{{GCLOUD_PROJECT}}/prometheus/snmp-targets/snmpexporter.json", + "--project={{GCLOUD_PROJECT}}"] resources: requests: memory: "150Mi" diff --git a/k8s/mlab-oti/prometheus-federation/deployments/pushgateway.yml b/k8s/prometheus-federation/deployments/pushgateway.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/deployments/pushgateway.yml rename to k8s/prometheus-federation/deployments/pushgateway.yml diff --git a/k8s/prometheus-federation/mlab-oti.yml b/k8s/prometheus-federation/mlab-oti.yml new file mode 100644 index 00000000..c8db13df --- /dev/null +++ b/k8s/prometheus-federation/mlab-oti.yml @@ -0,0 +1,7 @@ +# Configuration values for mlab-oti + +GCLOUD_PROJECT: mlab-oti +PROMETHEUS_RAM: 12Gi +PROMETHEUS_CPU: 6000m +PROMETHEUS_VOLUME_SIZE: 400Gi +EXTERNAL_IP: 35.184.81.106 diff --git a/k8s/prometheus-federation/mlab-sandbox.yml b/k8s/prometheus-federation/mlab-sandbox.yml new file mode 100644 index 00000000..3a70b603 --- /dev/null +++ b/k8s/prometheus-federation/mlab-sandbox.yml @@ -0,0 +1,7 @@ +# Configuration values for mlab-sandbox. + +GCLOUD_PROJECT: mlab-sandbox +PROMETHEUS_RAM: 12Gi +PROMETHEUS_CPU: 3000m +PROMETHEUS_VOLUME_SIZE: 200Gi +EXTERNAL_IP: 35.184.166.181 diff --git a/k8s/prometheus-federation/mlab-staging.yml b/k8s/prometheus-federation/mlab-staging.yml new file mode 100644 index 00000000..21a80511 --- /dev/null +++ b/k8s/prometheus-federation/mlab-staging.yml @@ -0,0 +1,7 @@ +# Configuration values for mlab-staging + +GCLOUD_PROJECT: mlab-staging +PROMETHEUS_RAM: 12Gi +PROMETHEUS_CPU: 6000m +PROMETHEUS_VOLUME_SIZE: 400Gi +EXTERNAL_IP: 35.185.76.159 diff --git a/k8s/mlab-sandbox/prometheus-federation/persistentvolumes/persistent-volumes.yml b/k8s/prometheus-federation/persistentvolumes/persistent-volumes.yml similarity index 94% rename from k8s/mlab-sandbox/prometheus-federation/persistentvolumes/persistent-volumes.yml rename to k8s/prometheus-federation/persistentvolumes/persistent-volumes.yml index 3aa7c5ce..165dc2c7 100644 --- a/k8s/mlab-sandbox/prometheus-federation/persistentvolumes/persistent-volumes.yml +++ b/k8s/prometheus-federation/persistentvolumes/persistent-volumes.yml @@ -9,7 +9,7 @@ spec: - ReadWriteOnce resources: requests: - storage: 200Gi + storage: {{PROMETHEUS_VOLUME_SIZE}} --- apiVersion: v1 kind: PersistentVolumeClaim diff --git a/k8s/mlab-oti/prometheus-federation/persistentvolumes/storage-class.yml b/k8s/prometheus-federation/persistentvolumes/storage-class.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/persistentvolumes/storage-class.yml rename to k8s/prometheus-federation/persistentvolumes/storage-class.yml diff --git a/k8s/mlab-oti/prometheus-federation/roles/rbac-prometheus.yml b/k8s/prometheus-federation/roles/rbac-prometheus.yml similarity index 100% rename from k8s/mlab-oti/prometheus-federation/roles/rbac-prometheus.yml rename to k8s/prometheus-federation/roles/rbac-prometheus.yml diff --git a/k8s/mlab-staging/prometheus-federation/services/alertmanager-public-service.yml b/k8s/prometheus-federation/services/alertmanager-public-service.yml similarity index 96% rename from k8s/mlab-staging/prometheus-federation/services/alertmanager-public-service.yml rename to k8s/prometheus-federation/services/alertmanager-public-service.yml index 6be6ee42..221d3aae 100644 --- a/k8s/mlab-staging/prometheus-federation/services/alertmanager-public-service.yml +++ b/k8s/prometheus-federation/services/alertmanager-public-service.yml @@ -19,5 +19,5 @@ spec: # Use the same static IP as used for Prometheus. externalIPs: # Use the same IP as above, since we're on a different port. - - 35.185.76.159 + - {{EXTERNAL_IP}} type: ClusterIP diff --git a/k8s/mlab-staging/prometheus-federation/services/blackbox-public-service.yml b/k8s/prometheus-federation/services/blackbox-public-service.yml similarity index 96% rename from k8s/mlab-staging/prometheus-federation/services/blackbox-public-service.yml rename to k8s/prometheus-federation/services/blackbox-public-service.yml index 2e622141..3de22454 100644 --- a/k8s/mlab-staging/prometheus-federation/services/blackbox-public-service.yml +++ b/k8s/prometheus-federation/services/blackbox-public-service.yml @@ -19,5 +19,5 @@ spec: # Use the same static IP as used for Prometheus. externalIPs: # Use the same IP as above, since we're on a different port. - - 35.185.76.159 + - {{EXTERNAL_IP}} type: ClusterIP diff --git a/k8s/mlab-oti/prometheus-federation/services/github-receiver-public-service.yml b/k8s/prometheus-federation/services/github-receiver-public-service.yml similarity index 95% rename from k8s/mlab-oti/prometheus-federation/services/github-receiver-public-service.yml rename to k8s/prometheus-federation/services/github-receiver-public-service.yml index 50d6f7f1..93a0d869 100644 --- a/k8s/mlab-oti/prometheus-federation/services/github-receiver-public-service.yml +++ b/k8s/prometheus-federation/services/github-receiver-public-service.yml @@ -15,5 +15,5 @@ spec: sessionAffinity: None # Allocate a static IP manually in GCP console: Networking -> Load Balancing. externalIPs: - - 35.184.81.106 + - {{EXTERNAL_IP}} type: ClusterIP diff --git a/k8s/mlab-oti/prometheus-federation/services/grafana-public-service.yml b/k8s/prometheus-federation/services/grafana-public-service.yml similarity index 96% rename from k8s/mlab-oti/prometheus-federation/services/grafana-public-service.yml rename to k8s/prometheus-federation/services/grafana-public-service.yml index 30108c78..7127235a 100644 --- a/k8s/mlab-oti/prometheus-federation/services/grafana-public-service.yml +++ b/k8s/prometheus-federation/services/grafana-public-service.yml @@ -19,5 +19,5 @@ spec: # Use the same static IP as used for Prometheus. externalIPs: # Use the same IP as above, since we're on a different port. - - 35.184.81.106 + - {{EXTERNAL_IP}} type: ClusterIP diff --git a/k8s/mlab-sandbox/prometheus-federation/services/prometheus-public-service.yml b/k8s/prometheus-federation/services/prometheus-public-service.yml similarity index 95% rename from k8s/mlab-sandbox/prometheus-federation/services/prometheus-public-service.yml rename to k8s/prometheus-federation/services/prometheus-public-service.yml index a42cb48d..2669b31a 100644 --- a/k8s/mlab-sandbox/prometheus-federation/services/prometheus-public-service.yml +++ b/k8s/prometheus-federation/services/prometheus-public-service.yml @@ -15,5 +15,5 @@ spec: sessionAffinity: None # Allocate a static IP manually in GCP console: Networking -> Load Balancing. externalIPs: - - 35.184.166.181 + - {{EXTERNAL_IP}} type: ClusterIP diff --git a/k8s/mlab-staging/prometheus-federation/services/pushgateway-public-service.yml b/k8s/prometheus-federation/services/pushgateway-public-service.yml similarity index 95% rename from k8s/mlab-staging/prometheus-federation/services/pushgateway-public-service.yml rename to k8s/prometheus-federation/services/pushgateway-public-service.yml index 02b3bc05..7e60e6d4 100644 --- a/k8s/mlab-staging/prometheus-federation/services/pushgateway-public-service.yml +++ b/k8s/prometheus-federation/services/pushgateway-public-service.yml @@ -16,5 +16,5 @@ spec: # Use the same static IP as used for Prometheus. externalIPs: # Use the same IP as above, since we're on a different port. - - 35.185.76.159 + - {{EXTERNAL_IP}} type: ClusterIP