Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch k8s metrics (container, pod, node, namespace) to use pdata. #23423

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .chloggen/switchk8spodnamespacenode.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: k8sclusterreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Switch k8s metrics (container, pod, node, namespace) to use pdata.

# One or more tracking issues related to the change
issues: [23423]
6 changes: 5 additions & 1 deletion receiver/k8sclusterreceiver/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ func TestE2E(t *testing.T) {
return value
}
containerImageShorten := func(value string) string {
return value[strings.LastIndex(value, "/"):]
index := strings.LastIndex(value, "/")
if index == -1 {
return value
}
return value[index:]
}
require.NoError(t, pmetrictest.CompareMetrics(expected, metricsConsumer.AllMetrics()[len(metricsConsumer.AllMetrics())-1],
pmetrictest.IgnoreTimestamp(),
Expand Down
1 change: 0 additions & 1 deletion receiver/k8sclusterreceiver/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ require (
github.com/census-instrumentation/opencensus-proto v0.4.1
github.com/google/go-cmp v0.5.9
github.com/google/uuid v1.3.0
github.com/iancoleman/strcase v0.2.0
github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.79.0
github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.79.0
github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig v0.79.0
Expand Down
2 changes: 0 additions & 2 deletions receiver/k8sclusterreceiver/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,11 @@ func (dc *DataCollector) SyncMetrics(obj interface{}) {

switch o := obj.(type) {
case *corev1.Pod:
md = ocsToMetrics(pod.GetMetrics(o, dc.settings.TelemetrySettings.Logger))
md = pod.GetMetrics(dc.settings, o)
case *corev1.Node:
md = ocsToMetrics(node.GetMetrics(o, dc.nodeConditionsToReport, dc.allocatableTypesToReport, dc.settings.TelemetrySettings.Logger))
md = node.GetMetrics(dc.settings, o, dc.nodeConditionsToReport, dc.allocatableTypesToReport)
case *corev1.Namespace:
md = ocsToMetrics(namespace.GetMetrics(o))
md = namespace.GetMetrics(dc.settings, o)
case *corev1.ReplicationController:
md = ocsToMetrics(replicationcontroller.GetMetrics(o))
case *corev1.ResourceQuota:
Expand Down
161 changes: 40 additions & 121 deletions receiver/k8sclusterreceiver/internal/container/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@
package container // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/container"

import (
"fmt"
"time"

metricspb "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1"
resourcepb "github.com/census-instrumentation/opencensus-proto/gen-go/resource/v1"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.opentelemetry.io/collector/receiver"
conventions "go.opentelemetry.io/collector/semconv/v1.6.1"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/docker"
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/maps"
metadataPkg "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/constants"
imetadata "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/container/internal/metadata"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/utils"
)
Expand All @@ -31,127 +30,40 @@ const (
containerStatusTerminated = "terminated"
)

var containerRestartMetric = &metricspb.MetricDescriptor{
Name: "k8s.container.restarts",
Description: "How many times the container has restarted in the recent past. " +
"This value is pulled directly from the K8s API and the value can go indefinitely high" +
" and be reset to 0 at any time depending on how your kubelet is configured to prune" +
" dead containers. It is best to not depend too much on the exact value but rather look" +
" at it as either == 0, in which case you can conclude there were no restarts in the recent" +
" past, or > 0, in which case you can conclude there were restarts in the recent past, and" +
" not try and analyze the value beyond that.",
Unit: "1",
Type: metricspb.MetricDescriptor_GAUGE_INT64,
}

var containerReadyMetric = &metricspb.MetricDescriptor{
Name: "k8s.container.ready",
Description: "Whether a container has passed its readiness probe (0 for no, 1 for yes)",
Type: metricspb.MetricDescriptor_GAUGE_INT64,
}

// GetStatusMetrics returns metrics about the status of the container.
func GetStatusMetrics(cs corev1.ContainerStatus) []*metricspb.Metric {
metrics := []*metricspb.Metric{
{
MetricDescriptor: containerRestartMetric,
Timeseries: []*metricspb.TimeSeries{
utils.GetInt64TimeSeries(int64(cs.RestartCount)),
},
},
{
MetricDescriptor: containerReadyMetric,
Timeseries: []*metricspb.TimeSeries{
utils.GetInt64TimeSeries(boolToInt64(cs.Ready)),
},
},
}

return metrics
}

func boolToInt64(b bool) int64 {
if b {
return 1
}
return 0
}

// GetSpecMetrics metricizes values from the container spec.
// This includes values like resource requests and limits.
func GetSpecMetrics(c corev1.Container) []*metricspb.Metric {
var metrics []*metricspb.Metric

for _, t := range []struct {
typ string
description string
rl corev1.ResourceList
}{
{
"request",
"Resource requested for the container. " +
"See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details",
c.Resources.Requests,
},
{
"limit",
"Maximum resource limit set for the container. " +
"See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details",
c.Resources.Limits,
},
} {
for k, v := range t.rl {
val := utils.GetInt64TimeSeries(v.Value())
valType := metricspb.MetricDescriptor_GAUGE_INT64
if k == corev1.ResourceCPU {
// cpu metrics must be of the double type to adhere to opentelemetry system.cpu metric specifications
valType = metricspb.MetricDescriptor_GAUGE_DOUBLE
val = utils.GetDoubleTimeSeries(float64(v.MilliValue()) / 1000.0)
}
metrics = append(metrics,
&metricspb.Metric{
MetricDescriptor: &metricspb.MetricDescriptor{
Name: fmt.Sprintf("k8s.container.%s_%s", k, t.typ),
Description: t.description,
Type: valType,
},
Timeseries: []*metricspb.TimeSeries{
val,
},
},
)
func GetSpecMetrics(set receiver.CreateSettings, c corev1.Container, pod *corev1.Pod) pmetric.Metrics {
mb := imetadata.NewMetricsBuilder(imetadata.DefaultMetricsBuilderConfig(), set)
ts := pcommon.NewTimestampFromTime(time.Now())
mb.RecordK8sContainerCPURequestDataPoint(ts, float64(c.Resources.Requests.Cpu().MilliValue())/1000.0)
mb.RecordK8sContainerCPULimitDataPoint(ts, float64(c.Resources.Limits.Cpu().MilliValue())/1000.0)
Comment on lines +38 to +39
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We miss k8s.container.memory_request and k8s.container.memory_limit now

for _, cs := range pod.Status.ContainerStatuses {
if cs.Name == c.Name {
mb.RecordK8sContainerRestartsDataPoint(ts, int64(cs.RestartCount))
mb.RecordK8sContainerReadyDataPoint(ts, boolToInt64(cs.Ready))
break
}
}

return metrics
}

// GetResource returns a proto representation of the pod.
func GetResource(labels map[string]string) *resourcepb.Resource {
return &resourcepb.Resource{
Type: constants.ContainerType,
Labels: labels,
}
}

// GetAllLabels returns all container labels, including ones from
// the pod in which the container is running.
func GetAllLabels(cs corev1.ContainerStatus,
dims map[string]string, logger *zap.Logger) map[string]string {

image, err := docker.ParseImageName(cs.Image)
image, err := docker.ParseImageName(c.Image)
if err != nil {
docker.LogParseError(err, cs.Image, logger)
docker.LogParseError(err, c.Image, set.Logger)
}

out := maps.CloneStringMap(dims)

out[conventions.AttributeContainerID] = utils.StripContainerID(cs.ContainerID)
out[conventions.AttributeK8SContainerName] = cs.Name
out[conventions.AttributeContainerImageName] = image.Repository
out[conventions.AttributeContainerImageTag] = image.Tag

return out
var containerID string
for _, cs := range pod.Status.ContainerStatuses {
if cs.Name == c.Name {
containerID = cs.ContainerID
}
}
return mb.Emit(imetadata.WithK8sPodUID(string(pod.UID)),
imetadata.WithK8sPodName(pod.Name),
imetadata.WithK8sNodeName(pod.Spec.NodeName),
imetadata.WithK8sNamespaceName(pod.Namespace),
imetadata.WithOpencensusResourcetype("container"),
imetadata.WithContainerID(utils.StripContainerID(containerID)),
imetadata.WithK8sContainerName(c.Name),
imetadata.WithContainerImageName(image.Repository),
imetadata.WithContainerImageTag(image.Tag),
)
}

func GetMetadata(cs corev1.ContainerStatus) *metadata.KubernetesMetadata {
Expand All @@ -177,3 +89,10 @@ func GetMetadata(cs corev1.ContainerStatus) *metadata.KubernetesMetadata {
Metadata: mdata,
}
}

func boolToInt64(b bool) int64 {
if b {
return 1
}
return 0
}
6 changes: 6 additions & 0 deletions receiver/k8sclusterreceiver/internal/container/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

//go:generate mdatagen metadata.yaml

package container // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/container"
59 changes: 59 additions & 0 deletions receiver/k8sclusterreceiver/internal/container/documentation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
[comment]: <> (Code generated by mdatagen. DO NOT EDIT.)

# k8s/container

## Default Metrics

The following metrics are emitted by default. Each of them can be disabled by applying the following configuration:

```yaml
metrics:
<metric_name>:
enabled: false
```

### k8s.container.cpu_limit

Maximum resource limit set for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details

| Unit | Metric Type | Value Type |
| ---- | ----------- | ---------- |
| 1 | Gauge | Double |

### k8s.container.cpu_request

Resource requested for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details

| Unit | Metric Type | Value Type |
| ---- | ----------- | ---------- |
| 1 | Gauge | Double |

### k8s.container.ready

Whether a container has passed its readiness probe (0 for no, 1 for yes)

| Unit | Metric Type | Value Type |
| ---- | ----------- | ---------- |
| 1 | Gauge | Int |

### k8s.container.restarts

How many times the container has restarted in the recent past. This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 at any time depending on how your kubelet is configured to prune dead containers. It is best to not depend too much on the exact value but rather look at it as either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case you can conclude there were restarts in the recent past, and not try and analyze the value beyond that.

| Unit | Metric Type | Value Type |
| ---- | ----------- | ---------- |
| 1 | Gauge | Int |

## Resource Attributes

| Name | Description | Values | Enabled |
| ---- | ----------- | ------ | ------- |
| container.id | The container id. | Any Str | true |
| container.image.name | The container image name | Any Str | true |
| container.image.tag | The container image tag | Any Str | true |
| k8s.container.name | The k8s container name | Any Str | true |
| k8s.namespace.name | The k8s namespace name | Any Str | true |
| k8s.node.name | The k8s node name | Any Str | true |
| k8s.pod.name | The k8s pod name | Any Str | true |
| k8s.pod.uid | The k8s pod uid | Any Str | true |
| opencensus.resourcetype | The OpenCensus resource type. | Any Str | true |
Loading