Skip to content

Commit

Permalink
updating metadata, adding reason whitelists
Browse files Browse the repository at this point in the history
  • Loading branch information
charlyF committed Nov 16, 2017
1 parent 5b24363 commit f215b58
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 5 deletions.
34 changes: 30 additions & 4 deletions kubernetes_state/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,15 +299,41 @@ def kube_pod_status_phase(self, message, **kwargs):

def kube_pod_container_status_waiting_reason(self, message, **kwargs):
metric_name = self.NAMESPACE + '.container.status_report.count.waiting'
whitelisted_reasons = {"ErrImagePull"}

This comment has been minimized.

Copy link
@xvello

xvello Nov 16, 2017

Contributor

that should be a global variable. You'll create and garbage collect it for every call

reason = True
tags = []
for metric in message.metric:
tags = [self._format_tag(label.name, label.value) for label in metric.label]
self.count(metric_name, metric.gauge.value, tags)
for label in metric.label:
if label.name == "reason":
if label.value in whitelisted_reasons:
tags.append(self._format_tag(label.name, label.value))
else:
reason = False

This comment has been minimized.

Copy link
@xvello

xvello Nov 16, 2017

Contributor

you can simply return instead of creating that bool

elif label.name == "container":
tags.append(self._format_tag("kube_container_name", label.value))
else label.name == "namespace":
tags.append(self._format_tag(label.name, label.value))
if reason:
self.count(metric_name, metric.gauge.value, tags)

def kube_pod_container_status_terminated_reason(self, message, **kwargs):
metric_name = self.NAMESPACE + '.container.status_report.count.terminated'
whitelisted_reasons = {"OOMKilled","ContainerCannotRun","Error"}
reason = True
tags = []
for metric in message.metric:
tags = [self._format_tag(label.name, label.value) for label in metric.label]
self.count(metric_name, metric.gauge.value, tags)
for label in metric.label:
if label.name == "reason":
if label.value in whitelisted_reasons:
tags.append(self._format_tag(label.name, label.value))
else:
reason = False
elif label.name == "container":
tags.append(self._format_tag("kube_container_name", label.value))
else label.name == "namespace":
tags.append(self._format_tag(label.name, label.value))
if reason:
self.count(metric_name, metric.gauge.value, tags)

def kube_cronjob_next_schedule_time(self, message, **kwargs):
""" Time until the next schedule """
Expand Down
9 changes: 8 additions & 1 deletion kubernetes_state/ci/fixtures/prometheus/prometheus.txt
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,13 @@ kube_pod_container_status_terminated{container="should-run-once",namespace="defa
kube_pod_container_status_terminated{container="sidecar",namespace="kube-system",pod="kube-dns-1326421443-hj4hx"} 0
kube_pod_container_status_terminated{container="task-pv-container",namespace="default",pod="task-pv-pod"} 0
kube_pod_container_status_terminated{container="tiller",namespace="kube-system",pod="tiller-deploy-1651615695-dcphn"} 0
kube_pod_container_status_terminated{container="container2",namespace="ns2",pod="pod2"} 1
# HELP kube_pod_container_status_terminated_reason Describes the reason the container is currently in terminated state.
# TYPE kube_pod_container_status_terminated_reason gauge
kube_pod_container_status_terminated_reason{container="container2",namespace="ns2",pod="pod2",reason="Completed"} 0
kube_pod_container_status_terminated_reason{container="container2",namespace="ns2",pod="pod2",reason="ContainerCannotRun"} 0
kube_pod_container_status_terminated_reason{container="container2",namespace="ns2",pod="pod2",reason="Error"} 0
kube_pod_container_status_terminated_reason{container="container2",namespace="ns2",pod="pod2",reason="OOMKilled"} 1
# HELP kube_pod_container_status_waiting Describes whether the container is currently in waiting state.
# TYPE kube_pod_container_status_waiting gauge
kube_pod_container_status_waiting{container="dd-k8state",namespace="default",pod="jaundiced-numbat-dd-k8state-b6s77"} 0
Expand Down Expand Up @@ -427,7 +434,7 @@ kube_pod_container_status_waiting_reason{container="sidecar",namespace="kube-sys
kube_pod_container_status_waiting_reason{container="task-pv-container",namespace="default",pod="task-pv-pod",reason="ContainerCreating"} 0
kube_pod_container_status_waiting_reason{container="task-pv-container",namespace="default",pod="task-pv-pod",reason="ErrImagePull"} 0
kube_pod_container_status_waiting_reason{container="tiller",namespace="kube-system",pod="tiller-deploy-1651615695-dcphn",reason="ContainerCreating"} 0
kube_pod_container_status_waiting_reason{container="tiller",namespace="kube-system",pod="tiller-deploy-1651615695-dcphn",reason="ErrImagePull"} 0
kube_pod_container_status_waiting_reason{container="tiller",namespace="kube-system",pod="tiller-deploy-1651615695-dcphn",reason="ErrImagePull"} 1
# HELP kube_pod_created Unix creation timestamp
# TYPE kube_pod_created gauge
kube_pod_created{namespace="default",pod="failingtest-f585bbd4-2fsml"} 1.510059371e+09
Expand Down
2 changes: 2 additions & 0 deletions kubernetes_state/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation
kubernetes_state.container.ready,gauge,,,,Whether the containers readiness check succeeded,0,kubernetes,k8s_state.container.rdy
kubernetes_state.container.running,gauge,,,,Whether the container is currently in running state,0,kubernetes,k8s_state.container.running
kubernetes_state.container.terminated,gauge,,,,Whether the container is currently in terminated state,0,kubernetes,k8s_state.container.term
kubernetes_state.container.status_report.count.terminated,count,,,,Count of the containers currently reporting a in terminated state with the reason as a tag,-1,k8s_state.container.status_report.count.term
kubernetes_state.container.waiting,gauge,,,,Whether the container is currently in waiting state,0,kubernetes,k8s_state.container.wait
kubernetes_state.container.status_report.count.waiting,count,,,,Count of the containers currently reporting a in waiting state with the reason as a tag,-1,k8s_state.container.status_report.count.wait
kubernetes_state.container.gpu.request,gauge,,,The number of requested gpu devices by a container,0,kubernetes,k8s_state.container.gpu.request
kubernetes_state.container.gpu.limit,gauge,,,The limit on gpu devices to be used by a container,0,kubernetes,k8s_state.container.gpu.limit
kubernetes_state.container.restarts,gauge,,,,The number of restarts per container,-1,kubernetes,k8s_state.container.restarts
Expand Down

0 comments on commit f215b58

Please sign in to comment.