Skip to content

Commit

Permalink
Add k8s_cluster_name to resource attrs (#976)
Browse files Browse the repository at this point in the history
  • Loading branch information
marctc authored Jul 2, 2024
1 parent bdd38fa commit b51cc68
Show file tree
Hide file tree
Showing 23 changed files with 85 additions and 44 deletions.
1 change: 1 addition & 0 deletions docs/sources/configure/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ and metrics with the Standard OpenTelemetry labels:
- `k8s.pod.name`
- `k8s.pod.uid`
- `k8s.pod.start_time`
- `k8s.cluster.name`

In YAML, this section is named `kubernetes`, and is located under the
`attributes` top-level section. For example:
Expand Down
1 change: 1 addition & 0 deletions docs/sources/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ In order to configure which attributes to show or which attributes to hide, chec
| Application (all) | `k8s.pod.uid` | shown if `attributes.kubernetes.enable` |
| Application (all) | `k8s.replicaset.name` | shown if `attributes.kubernetes.enable` |
| Application (all) | `k8s.statefulset.name` | shown if `attributes.kubernetes.enable` |
| Application (all) | `k8s.cluster.name` | shown if `attributes.kubernetes.enable` |
| Application (all) | `service.name` | shown |
| Application (all) | `service.namespace` | shown |
| Application (all) | `target.instance` | shown |
Expand Down
1 change: 1 addition & 0 deletions docs/sources/setup/kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Beyla can decorate your traces with the following Kubernetes labels:
- `k8s.pod.name`
- `k8s.pod.uid`
- `k8s.pod.start_time`
- `k8s.cluster.name`

To enable metadata decoration, you need to:

Expand Down
1 change: 1 addition & 0 deletions pkg/internal/export/attributes/attr_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ func getDefinitions(groups AttrGroups) map[Section]AttrReportGroup {
attr.K8sNodeName: true,
attr.K8sPodUID: true,
attr.K8sPodStartTime: true,
attr.K8sClusterName: true,
},
}

Expand Down
4 changes: 3 additions & 1 deletion pkg/internal/export/prom/prom.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ const (
k8sNodeName = "k8s_node_name"
k8sPodUID = "k8s_pod_uid"
k8sPodStartTime = "k8s_pod_start_time"
k8sClusterName = "k8s_cluster_name"

spanNameKey = "span_name"
statusCodeKey = "status_code"
Expand Down Expand Up @@ -646,7 +647,7 @@ func (r *metricsReporter) observe(span *request.Span) {

func appendK8sLabelNames(names []string) []string {
names = append(names, k8sNamespaceName, k8sPodName, k8sNodeName, k8sPodUID, k8sPodStartTime,
k8sDeploymentName, k8sReplicaSetName, k8sStatefulSetName, k8sDaemonSetName)
k8sDeploymentName, k8sReplicaSetName, k8sStatefulSetName, k8sDaemonSetName, k8sClusterName)
return names
}

Expand All @@ -662,6 +663,7 @@ func appendK8sLabelValuesService(values []string, service svc.ID) []string {
service.Metadata[(attr.K8sReplicaSetName)],
service.Metadata[(attr.K8sStatefulSetName)],
service.Metadata[(attr.K8sDaemonSetName)],
service.Metadata[(attr.K8sClusterName)],
)
return values
}
Expand Down
33 changes: 1 addition & 32 deletions pkg/internal/netolly/transform/k8s/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"context"
"fmt"
"log/slog"
"time"

"github.com/hashicorp/golang-lru/v2/simplelru"
"github.com/mariomac/pipes/pipe"
Expand All @@ -46,10 +45,6 @@ const (
)

const alreadyLoggedIPsCacheLen = 256
const (
clusterMetadataRetries = 5
clusterMetadataFailRetryTime = 500 * time.Millisecond
)

func log() *slog.Logger { return slog.With("component", "k8s.MetadataDecorator") }

Expand Down Expand Up @@ -163,7 +158,7 @@ func (n *decorator) decorate(flow *ebpf.Record, prefix, ip string) bool {
func newDecorator(ctx context.Context, cfg *transform.KubernetesDecorator, meta *kube.Metadata) (*decorator, error) {
nt := decorator{
log: log(),
clusterName: kubeClusterName(ctx, cfg),
clusterName: transform.KubeClusterName(ctx, cfg),
kube: meta,
}
if nt.log.Enabled(ctx, slog.LevelDebug) {
Expand All @@ -175,29 +170,3 @@ func newDecorator(ctx context.Context, cfg *transform.KubernetesDecorator, meta
}
return &nt, nil
}

func kubeClusterName(ctx context.Context, cfg *transform.KubernetesDecorator) string {
log := log().With("func", "kubeClusterName")
if cfg.ClusterName != "" {
return cfg.ClusterName
}
retries := 0
for retries < clusterMetadataRetries {
if clusterName := fetchClusterName(ctx); clusterName != "" {
return clusterName
}
retries++
log.Debug("retrying cluster name fetching in 500 ms...")
select {
case <-ctx.Done():
log.Debug("context canceled before starting the kubernetes decorator node")
return ""
case <-time.After(clusterMetadataFailRetryTime):
// retry or end!
}
}
log.Warn("can't fetch Kubernetes Cluster Name." +
" Network metrics won't contain k8s.cluster.name attribute unless you explicitly set " +
" the BEYLA_KUBE_CLUSTER_NAME environment variable")
return ""
}
2 changes: 1 addition & 1 deletion pkg/internal/pipe/instrumenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func newGraphBuilder(ctx context.Context, config *beyla.Config, ctxInfo *global.
}))

pipe.AddMiddleProvider(gnb, router, transform.RoutesProvider(config.Routes))
pipe.AddMiddleProvider(gnb, kubernetes, transform.KubeDecoratorProvider(ctxInfo))
pipe.AddMiddleProvider(gnb, kubernetes, transform.KubeDecoratorProvider(ctx, &config.Attributes.Kubernetes, ctxInfo))
pipe.AddMiddleProvider(gnb, nameResolver, transform.NameResolutionProvider(gb.ctxInfo, config.NameResolver))
pipe.AddMiddleProvider(gnb, attrFilter, filter.ByAttribute(config.Filters.Application, spanPtrPromGetters))
config.Metrics.Grafana = &gb.config.Grafana.OTLP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// https://github.com/DataDog/datadog-agent,
// published under Apache License 2.0

package k8s
package transform

import (
"bytes"
Expand Down Expand Up @@ -43,7 +43,7 @@ type clusterNameFetcher func(context.Context) (string, error)
// cloud providers: EC2, GCP, Azure.
// TODO: consider other providers (Alibaba, Oracle, etc...)
func fetchClusterName(ctx context.Context) string {
log := log().With("func", "fetchClusterName")
log := klog().With("func", "fetchClusterName")
var clusterNameFetchers = map[string]clusterNameFetcher{
"EC2": ec2ClusterNameFetcher,
"GCP": gcpClusterNameFetcher,
Expand Down
48 changes: 43 additions & 5 deletions pkg/transform/k8s.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package transform

import (
"context"
"log/slog"
"time"

Expand Down Expand Up @@ -34,13 +35,22 @@ type KubernetesDecorator struct {
DropExternal bool `yaml:"drop_external" env:"BEYLA_NETWORK_DROP_EXTERNAL"`
}

func KubeDecoratorProvider(ctxInfo *global.ContextInfo) pipe.MiddleProvider[[]request.Span, []request.Span] {
const (
clusterMetadataRetries = 5
clusterMetadataFailRetryTime = 500 * time.Millisecond
)

func KubeDecoratorProvider(
ctx context.Context,
cfg *KubernetesDecorator,
ctxInfo *global.ContextInfo,
) pipe.MiddleProvider[[]request.Span, []request.Span] {
return func() (pipe.MiddleFunc[[]request.Span, []request.Span], error) {
if !ctxInfo.K8sInformer.IsKubeEnabled() {
// if kubernetes decoration is disabled, we just bypass the node
return pipe.Bypass[[]request.Span](), nil
}
decorator := &metadataDecorator{db: ctxInfo.AppO11y.K8sDatabase}
decorator := &metadataDecorator{db: ctxInfo.AppO11y.K8sDatabase, clusterName: KubeClusterName(ctx, cfg)}
return decorator.nodeLoop, nil
}
}
Expand All @@ -52,7 +62,8 @@ type kubeDatabase interface {
}

type metadataDecorator struct {
db kubeDatabase
db kubeDatabase
clusterName string
}

func (md *metadataDecorator) nodeLoop(in <-chan []request.Span, out chan<- []request.Span) {
Expand All @@ -69,7 +80,7 @@ func (md *metadataDecorator) nodeLoop(in <-chan []request.Span, out chan<- []req

func (md *metadataDecorator) do(span *request.Span) {
if podInfo, ok := md.db.OwnerPodInfo(span.Pid.Namespace); ok {
appendMetadata(span, podInfo)
md.appendMetadata(span, podInfo)
} else {
// do not leave the service attributes map as nil
span.ServiceID.Metadata = map[attr.Name]string{}
Expand All @@ -83,7 +94,7 @@ func (md *metadataDecorator) do(span *request.Span) {
}
}

func appendMetadata(span *request.Span, info *kube.PodInfo) {
func (md *metadataDecorator) appendMetadata(span *request.Span, info *kube.PodInfo) {
// If the user has not defined criteria values for the reported
// service name and namespace, we will automatically set it from
// the kubernetes metadata
Expand All @@ -103,6 +114,7 @@ func appendMetadata(span *request.Span, info *kube.PodInfo) {
attr.K8sNodeName: info.NodeName,
attr.K8sPodUID: string(info.UID),
attr.K8sPodStartTime: info.StartTimeStr,
attr.K8sClusterName: md.clusterName,
}
owner := info.Owner
for owner != nil {
Expand All @@ -112,3 +124,29 @@ func appendMetadata(span *request.Span, info *kube.PodInfo) {
// override hostname by the Pod name
span.ServiceID.HostName = info.Name
}

func KubeClusterName(ctx context.Context, cfg *KubernetesDecorator) string {
log := klog().With("func", "KubeClusterName")
if cfg.ClusterName != "" {
return cfg.ClusterName
}
retries := 0
for retries < clusterMetadataRetries {
if clusterName := fetchClusterName(ctx); clusterName != "" {
return clusterName
}
retries++
log.Debug("retrying cluster name fetching in 500 ms...")
select {
case <-ctx.Done():
log.Debug("context canceled before starting the kubernetes decorator node")
return ""
case <-time.After(clusterMetadataFailRetryTime):
// retry or end!
}
}
log.Warn("can't fetch Kubernetes Cluster Name." +
" Network metrics won't contain k8s.cluster.name attribute unless you explicitly set " +
" the BEYLA_KUBE_CLUSTER_NAME environment variable")
return ""
}
6 changes: 5 additions & 1 deletion pkg/transform/k8s_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func TestDecoration(t *testing.T) {
NodeName: "the-node",
StartTimeStr: "2020-01-02 12:56:56",
},
}}}
}}, clusterName: "the-cluster"}
inputCh, outputhCh := make(chan []request.Span, 10), make(chan []request.Span, 10)
defer close(inputCh)
go dec.nodeLoop(inputCh, outputhCh)
Expand All @@ -63,6 +63,7 @@ func TestDecoration(t *testing.T) {
"k8s.pod.uid": "uid-12",
"k8s.deployment.name": "deployment-12",
"k8s.pod.start_time": "2020-01-02 12:12:56",
"k8s.cluster.name": "the-cluster",
}, deco[0].ServiceID.Metadata)
})
t.Run("pod info without deployment should set replicaset as name", func(t *testing.T) {
Expand All @@ -80,6 +81,7 @@ func TestDecoration(t *testing.T) {
"k8s.pod.name": "pod-34",
"k8s.pod.uid": "uid-34",
"k8s.pod.start_time": "2020-01-02 12:34:56",
"k8s.cluster.name": "the-cluster",
}, deco[0].ServiceID.Metadata)
})
t.Run("pod info with only pod name should set pod name as name", func(t *testing.T) {
Expand All @@ -96,6 +98,7 @@ func TestDecoration(t *testing.T) {
"k8s.pod.name": "the-pod",
"k8s.pod.uid": "uid-56",
"k8s.pod.start_time": "2020-01-02 12:56:56",
"k8s.cluster.name": "the-cluster",
}, deco[0].ServiceID.Metadata)
})
t.Run("process without pod Info won't be decorated", func(t *testing.T) {
Expand Down Expand Up @@ -123,6 +126,7 @@ func TestDecoration(t *testing.T) {
"k8s.pod.uid": "uid-12",
"k8s.deployment.name": "deployment-12",
"k8s.pod.start_time": "2020-01-02 12:12:56",
"k8s.cluster.name": "the-cluster",
}, deco[0].ServiceID.Metadata)
})
}
Expand Down
4 changes: 3 additions & 1 deletion test/integration/configs/instrumenter-config-promscrape.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ attributes:
process_disk_io:
include: ["*"]
process_network_io:
include: ["*"]
include: ["*"]
kubernetes:
cluster_name: beyla
3 changes: 3 additions & 0 deletions test/integration/configs/instrumenter-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ otel_metrics_export:
endpoint: http://otelcol:4318
otel_traces_export:
endpoint: http://jaeger:4318
attributes:
kubernetes:
cluster_name: beyla
5 changes: 5 additions & 0 deletions test/integration/k8s/common/k8s_metrics_testfuncs.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func FeatureHTTPMetricsDecoration(manifest string) features.Feature {
"k8s_node_name": ".+-control-plane$",
"k8s_pod_uid": UUIDRegex,
"k8s_pod_start_time": TimeRegex,
"k8s_cluster_name": "^beyla$",
}, "k8s_deployment_name")).
Assess("all the server metrics are properly decorated",
testMetricsDecoration(httpServerMetrics, `{url_path="/iping",k8s_pod_name=~"testserver-.*"}`, map[string]string{
Expand All @@ -120,6 +121,7 @@ func FeatureHTTPMetricsDecoration(manifest string) features.Feature {
"k8s_pod_start_time": TimeRegex,
"k8s_deployment_name": "^testserver$",
"k8s_replicaset_name": "^testserver-",
"k8s_cluster_name": "^beyla$",
})).
Assess("all the span graph metrics exist",
testMetricsDecoration(spanGraphMetrics, `{server="testserver",client="internal-pinger"}`, map[string]string{
Expand All @@ -146,6 +148,7 @@ func FeatureGRPCMetricsDecoration(manifest string) features.Feature {
"k8s_node_name": ".+-control-plane$",
"k8s_pod_uid": UUIDRegex,
"k8s_pod_start_time": TimeRegex,
"k8s_cluster_name": "^beyla$",
}, "k8s_deployment_name")).
Assess("all the server metrics are properly decorated",
testMetricsDecoration(grpcServerMetrics, `{k8s_pod_name=~"testserver-.*"}`, map[string]string{
Expand All @@ -155,6 +158,7 @@ func FeatureGRPCMetricsDecoration(manifest string) features.Feature {
"k8s_pod_start_time": TimeRegex,
"k8s_deployment_name": "^testserver$",
"k8s_replicaset_name": "^testserver-",
"k8s_cluster_name": "^beyla$",
}),
).Feature()
}
Expand All @@ -168,6 +172,7 @@ func FeatureProcessMetricsDecoration(overrideProperties map[string]string) featu
"k8s_pod_start_time": TimeRegex,
"k8s_deployment_name": "^testserver$",
"k8s_replicaset_name": "^testserver-",
"k8s_cluster_name": "^beyla$",
}
for k, v := range overrideProperties {
properties[k] = v
Expand Down
2 changes: 2 additions & 0 deletions test/integration/k8s/daemonset/k8s_daemonset_traces_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func TestBasicTracing(t *testing.T) {
{Key: "k8s.pod.start_time", Type: "string", Value: k8s.TimeRegex},
{Key: "k8s.deployment.name", Type: "string", Value: "^otherinstance"},
{Key: "k8s.namespace.name", Type: "string", Value: "^default$"},
{Key: "k8s.cluster.name", Type: "string", Value: "^beyla$"},
}, trace.Processes[parent.ProcessID].Tags)
require.Empty(t, sd)

Expand Down Expand Up @@ -144,6 +145,7 @@ func TestBasicTracing(t *testing.T) {
{Key: "k8s.pod.start_time", Type: "string", Value: k8s.TimeRegex},
{Key: "k8s.deployment.name", Type: "string", Value: "^otherinstance"},
{Key: "k8s.namespace.name", Type: "string", Value: "^default$"},
{Key: "k8s.cluster.name", Type: "string", Value: "^beyla$"},
}, trace.Processes[parent.ProcessID].Tags)
require.Empty(t, sd)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func TestPythonBasicTracing(t *testing.T) {
{Key: "k8s.pod.uid", Type: "string", Value: k8s.UUIDRegex},
{Key: "k8s.pod.start_time", Type: "string", Value: k8s.TimeRegex},
{Key: "k8s.namespace.name", Type: "string", Value: "^default$"},
{Key: "k8s.cluster.name", Type: "string", Value: "^beyla$"},
}, trace.Processes[parent.ProcessID].Tags)
require.Empty(t, sd, sd.String())

Expand Down Expand Up @@ -118,6 +119,7 @@ func TestPythonBasicTracing(t *testing.T) {
{Key: "k8s.pod.uid", Type: "string", Value: k8s.UUIDRegex},
{Key: "k8s.pod.start_time", Type: "string", Value: k8s.TimeRegex},
{Key: "k8s.namespace.name", Type: "string", Value: "^default$"},
{Key: "k8s.cluster.name", Type: "string", Value: "^beyla$"},
}, trace.Processes[parent.ProcessID].Tags)
require.Empty(t, sd, sd.String())

Expand Down
1 change: 1 addition & 0 deletions test/integration/k8s/manifests/00-kind.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
apiVersion: kind.x-k8s.io/v1alpha4
kind: Cluster
name: beyla
nodes:
- role: control-plane
extraMounts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,6 @@ spec:
value: "autodetect"
- name: BEYLA_OTEL_METRIC_FEATURES
value: "application,application_span,application_service_graph,application_process"
- name: BEYLA_KUBE_CLUSTER_NAME
value: "beyla"

Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ spec:
value: "application,application_span,application_service_graph,application_process"
- name: BEYLA_OTEL_METRICS_TTL
value: "30m0s"
- name: BEYLA_KUBE_CLUSTER_NAME
value: "beyla"
ports:
- containerPort: 8999
hostPort: 8999
Expand Down
Loading

0 comments on commit b51cc68

Please sign in to comment.