diff --git a/internal/aws/containerinsight/const.go b/internal/aws/containerinsight/const.go index ee8764f1364e..8d837aac96da 100644 --- a/internal/aws/containerinsight/const.go +++ b/internal/aws/containerinsight/const.go @@ -93,6 +93,10 @@ const ( StatusConditionNetworkUnavailable = "status_condition_network_unavailable" StatusCapacityPods = "status_capacity_pods" StatusAllocatablePods = "status_allocatable_pods" + StatusRunning = "status_running" + StatusTerminated = "status_terminated" + StatusWaiting = "status_waiting" + StatusWaitingReasonCrashed = "status_waiting_reason_crashed" RunningPodCount = "number_of_running_pods" RunningContainerCount = "number_of_running_containers" @@ -218,6 +222,12 @@ func init() { StatusCapacityPods: UnitCount, StatusAllocatablePods: UnitCount, + // kube-state-metrics equivalents + StatusRunning: UnitCount, + StatusTerminated: UnitCount, + StatusWaiting: UnitCount, + StatusWaitingReasonCrashed: UnitCount, + // cluster metrics NodeCount: UnitCount, FailedNodeCount: UnitCount, diff --git a/receiver/awscontainerinsightreceiver/README.md b/receiver/awscontainerinsightreceiver/README.md index 32302aa645a8..ce1b1e93c797 100644 --- a/receiver/awscontainerinsightreceiver/README.md +++ b/receiver/awscontainerinsightreceiver/README.md @@ -646,30 +646,34 @@ kubectl apply -f config.yaml ### Container -| Metric | Unit | -|-----------------------------------------|---------------| -| container_cpu_limit | Millicore | -| container_cpu_request | Millicore | -| container_cpu_usage_system | Millicore | -| container_cpu_usage_total | Millicore | -| container_cpu_usage_user | Millicore | -| container_cpu_utilization | Percent | -| container_memory_cache | Bytes | -| container_memory_failcnt | Count | -| container_memory_hierarchical_pgfault | Count/Second | -| container_memory_hierarchical_pgmajfault| Count/Second | -| container_memory_limit | Bytes | -| container_memory_mapped_file | Bytes | -| container_memory_max_usage | Bytes | -| container_memory_pgfault | Count/Second | -| container_memory_pgmajfault | Count/Second | -| container_memory_request | Bytes | -| container_memory_rss | Bytes | -| container_memory_swap | Bytes | -| container_memory_usage | Bytes | -| container_memory_utilization | Percent | -| container_memory_working_set | Bytes | -| number_of_container_restarts | Count | +| Metric | Unit | +|------------------------------------------|--------------| +| container_cpu_limit | Millicore | +| container_cpu_request | Millicore | +| container_cpu_usage_system | Millicore | +| container_cpu_usage_total | Millicore | +| container_cpu_usage_user | Millicore | +| container_cpu_utilization | Percent | +| container_memory_cache | Bytes | +| container_memory_failcnt | Count | +| container_memory_hierarchical_pgfault | Count/Second | +| container_memory_hierarchical_pgmajfault | Count/Second | +| container_memory_limit | Bytes | +| container_memory_mapped_file | Bytes | +| container_memory_max_usage | Bytes | +| container_memory_pgfault | Count/Second | +| container_memory_pgmajfault | Count/Second | +| container_memory_request | Bytes | +| container_memory_rss | Bytes | +| container_memory_swap | Bytes | +| container_memory_usage | Bytes | +| container_memory_utilization | Percent | +| container_memory_working_set | Bytes | +| number_of_container_restarts | Count | +| container_status_running | Count | +| container_status_terminated | Count | +| container_status_waiting | Count | +| container_status_waiting_reason_crashed | Count |

diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go index 039b6c207742..4ca266d987f4 100644 --- a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go +++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go @@ -469,16 +469,28 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) { if containerName := metric.GetTag(ci.ContainerNamekey); containerName != "" { for _, containerStatus := range pod.Status.ContainerStatuses { if containerStatus.Name == containerName { + possibleStatuses := map[string]int{ + ci.StatusRunning: 0, + ci.StatusWaiting: 0, + ci.StatusWaitingReasonCrashed: 0, + ci.StatusTerminated: 0, + } switch { case containerStatus.State.Running != nil: metric.AddTag(ci.ContainerStatus, "Running") + possibleStatuses[ci.StatusRunning] = 1 case containerStatus.State.Waiting != nil: metric.AddTag(ci.ContainerStatus, "Waiting") + possibleStatuses[ci.StatusWaiting] = 1 if containerStatus.State.Waiting.Reason != "" { metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Waiting.Reason) + if strings.Contains(containerStatus.State.Waiting.Reason, "Crash") { + possibleStatuses[ci.StatusWaitingReasonCrashed] = 1 + } } case containerStatus.State.Terminated != nil: metric.AddTag(ci.ContainerStatus, "Terminated") + possibleStatuses[ci.StatusTerminated] = 1 if containerStatus.State.Terminated.Reason != "" { metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Terminated.Reason) } @@ -500,6 +512,11 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) { } p.setPrevMeasurement(ci.TypeContainer, containerKey, prevContainerMeasurement{restarts: int(containerStatus.RestartCount)}) } + + // add container containerStatus metrics + for name, val := range possibleStatuses { + metric.AddField(ci.MetricName(ci.TypeContainer, name), val) + } } } } diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go index 1ee568a3d0f1..32c695439971 100644 --- a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go +++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go @@ -310,6 +310,9 @@ func TestPodStore_addStatus(t *testing.T) { assert.Equal(t, "Running", metric.GetTag(ci.ContainerStatus)) val = metric.GetField(ci.ContainerRestartCount) assert.Nil(t, val) + val = metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusRunning)) + assert.NotNil(t, val) + assert.Equal(t, 1, val) pod.Status.ContainerStatuses[0].State.Running = nil pod.Status.ContainerStatuses[0].State.Terminated = &corev1.ContainerStateTerminated{} @@ -331,6 +334,28 @@ func TestPodStore_addStatus(t *testing.T) { assert.Equal(t, "Terminated", metric.GetTag(ci.ContainerStatus)) assert.Equal(t, "OOMKilled", metric.GetTag(ci.ContainerLastTerminationReason)) assert.Equal(t, int(1), metric.GetField(ci.ContainerRestartCount).(int)) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusTerminated))) + + pod.Status.ContainerStatuses[0].State.Terminated = nil + pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "CrashLoopBackOff"} + + tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"} + metric = generateMetric(fields, tags) + + podStore.addStatus(metric, pod) + assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus)) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting))) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCrashed))) + + pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "SomeOtherReason"} + + tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"} + metric = generateMetric(fields, tags) + + podStore.addStatus(metric, pod) + assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus)) + assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting))) + assert.Equal(t, 0, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCrashed))) // test delta of restartCount pod.Status.ContainerStatuses[0].RestartCount = 3