Skip to content

Commit

Permalink
feat: add data aggregation level (#556)
Browse files Browse the repository at this point in the history
# Description

This PR introduced the config map option for data aggregation. More
details can be found in the docs.
## Related Issue
#138

## Checklist

- [x] I have read the [contributing
documentation](https://retina.sh/docs/contributing).
- [x] I signed and signed-off the commits (`git commit -S -s ...`). See
[this
documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification)
on signing commits.
- [x] I have correctly attributed the author(s) of the code.
- [x] I have tested the changes locally.
- [x] I have followed the project's style guidelines.
- [x] I have updated the documentation, if necessary.
- [x] I have added tests, if applicable.

## Screenshots (if applicable) or Testing Completed
- Added unit tests for parsing data aggregation level from config file
- Deploy on cluster with different data aggregation level, verified that
on `high`, `packetparser` doesn't attach bpf program to eth0
## Additional Notes

Add any additional notes or context about the pull request here.

---

Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more
information on how to contribute to this project.

---------

Signed-off-by: Quang Nguyen <[email protected]>
  • Loading branch information
nddq authored Aug 6, 2024
1 parent 419175f commit 37d8401
Show file tree
Hide file tree
Showing 25 changed files with 288 additions and 41 deletions.
1 change: 1 addition & 0 deletions cmd/hubble/daemon_main_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ func setupZapLogger(retinaConfig *config.Config, k8sCfg *rest.Config) *log.ZapLo
zap.String("version", buildinfo.Version),
zap.String("apiserver", k8sCfg.Host),
zap.Strings("plugins", retinaConfig.EnabledPlugin),
zap.String("data aggregation level", retinaConfig.DataAggregationLevel.String()),
}

_, err := log.SetupZapLogger(logOpts, persistentFields...)
Expand Down
3 changes: 3 additions & 0 deletions cmd/legacy/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func (d *Daemon) Start() error {
zap.String("version", buildinfo.Version),
zap.String("apiserver", cfg.Host),
zap.String("plugins", strings.Join(daemonConfig.EnabledPlugin, `,`)),
zap.String("data aggregation level", daemonConfig.DataAggregationLevel.String()),
)
if err != nil {
panic(err)
Expand All @@ -120,6 +121,8 @@ func (d *Daemon) Start() error {

metrics.InitializeMetrics()

mainLogger.Info(zap.String("data aggregation level", daemonConfig.DataAggregationLevel.String()))

var tel telemetry.Telemetry
if daemonConfig.EnableTelemetry && buildinfo.ApplicationInsightsID != "" {
mainLogger.Info("telemetry enabled", zap.String("applicationInsightsID", buildinfo.ApplicationInsightsID))
Expand Down
16 changes: 8 additions & 8 deletions controller/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@ ARG OS_VERSION
# pinned base images

# mcr.microsoft.com/oss/go/microsoft/golang:1.22.3-1-cbl-mariner2.0
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/oss/go/microsoft/golang@sha256:8253def0216b87b2994b7ad689aeec7440f6eb67f981e438071d8d67e36ff69f as golang
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/oss/go/microsoft/golang@sha256:8253def0216b87b2994b7ad689aeec7440f6eb67f981e438071d8d67e36ff69f AS golang

# mcr.microsoft.com/cbl-mariner/base/core:2.0
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/cbl-mariner/base/core@sha256:77651116f2e83cf50fddd8a0316945499f8ce6521ff8e94e67539180d1e5975a as mariner-core
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/cbl-mariner/base/core@sha256:77651116f2e83cf50fddd8a0316945499f8ce6521ff8e94e67539180d1e5975a AS mariner-core

# mcr.microsoft.com/cbl-mariner/distroless/minimal:2.0
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/cbl-mariner/distroless/minimal@sha256:63a0a70ceaa1320bc6eb98b81106667d43e46b674731ea8d28e4de1b87e0747f as mariner-distroless
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/cbl-mariner/distroless/minimal@sha256:63a0a70ceaa1320bc6eb98b81106667d43e46b674731ea8d28e4de1b87e0747f AS mariner-distroless

# mcr.microsoft.com/windows/servercore:ltsc2019
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/windows/servercore@sha256:6fdf140282a2f809dae9b13fe441635867f0a27c33a438771673b8da8f3348a4 as ltsc2019
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/windows/servercore@sha256:6fdf140282a2f809dae9b13fe441635867f0a27c33a438771673b8da8f3348a4 AS ltsc2019

# mcr.microsoft.com/windows/servercore:ltsc2022
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/windows/servercore@sha256:45952938708fbde6ec0b5b94de68bcdec3f8c838be018536b1e9e5bd95e6b943 as ltsc2022
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/windows/servercore@sha256:45952938708fbde6ec0b5b94de68bcdec3f8c838be018536b1e9e5bd95e6b943 AS ltsc2022


# build stages
Expand Down Expand Up @@ -103,7 +103,7 @@ RUN echo "Hubble version: $HUBBLE_VERSION" && \
rm hubble-linux-${HUBBLE_ARCH}.tar.gz && rm hubble-linux-${HUBBLE_ARCH}.tar.gz.sha256sum

# init final image
FROM mariner-distroless as init
FROM mariner-distroless AS init
COPY --from=init-bin /go/bin/retina/initretina /retina/initretina
COPY --from=tools /lib/ /lib
COPY --from=tools /usr/lib/ /usr/lib
Expand All @@ -113,7 +113,7 @@ ENTRYPOINT ["./retina/initretina"]
# agent final image
# mcr.microsoft.com/cbl-mariner/distroless/minimal:2.0
# mcr.microsoft.com/cbl-mariner/distroless/minimal@sha256:63a0a70ceaa1320bc6eb98b81106667d43e46b674731ea8d28e4de1b87e0747f
FROM mariner-distroless as agent
FROM mariner-distroless AS agent
COPY --from=tools /lib/ /lib
COPY --from=tools /usr/lib/ /usr/lib
COPY --from=tools /tmp/bin/ /bin
Expand All @@ -128,7 +128,7 @@ ENTRYPOINT ["./retina/controller"]


# agent final image for windows
FROM ${OS_VERSION} as agent-win
FROM ${OS_VERSION} AS agent-win
COPY --from=controller-bin /go/src/github.com/microsoft/retina/windows/kubeconfigtemplate.yaml kubeconfigtemplate.yaml
COPY --from=controller-bin /go/src/github.com/microsoft/retina/windows/setkubeconfigpath.ps1 setkubeconfigpath.ps1
COPY --from=controller-bin /go/bin/retina/controller controller.exe
Expand Down
2 changes: 1 addition & 1 deletion controller/Dockerfile.windows-2019
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.21 as builder
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.21 AS builder
# Build args
ARG VERSION
ARG APP_INSIGHTS_ID
Expand Down
4 changes: 3 additions & 1 deletion controller/Dockerfile.windows-2022
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.21 as builder
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.21 AS builder

# Build args
ARG VERSION
ARG APP_INSIGHTS_ID

Expand Down
2 changes: 1 addition & 1 deletion controller/Dockerfile.windows-cgo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM --platform=windows/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22.2-windowsservercore-ltsc2022 as cgo
FROM --platform=windows/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22.2-windowsservercore-ltsc2022 AS cgo

SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]

Expand Down
4 changes: 2 additions & 2 deletions controller/Dockerfile.windows-native
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# buildx targets, and this one requires legacy build.
# Maybe one day: https://github.com/moby/buildkit/issues/616
ARG BUILDER_IMAGE
FROM --platform=windows/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22-windowsservercore-ltsc2022 as builder
FROM --platform=windows/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22-windowsservercore-ltsc2022 AS builder
WORKDIR C:\\retina
COPY go.mod .
COPY go.sum .
Expand All @@ -22,7 +22,7 @@ RUN go build -v -o captureworkload.exe -ldflags="-X github.com/microsoft/retina/
FROM --platform=windows/amd64 ${BUILDER_IMAGE} as pktmon-builder
WORKDIR C:\\retina

FROM --platform=windows/amd64 mcr.microsoft.com/windows/nanoserver:ltsc2022 as final
FROM --platform=windows/amd64 mcr.microsoft.com/windows/nanoserver:ltsc2022 AS final
ADD https://github.com/microsoft/etl2pcapng/releases/download/v1.10.0/etl2pcapng.exe /etl2pcapng.exe
SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'Continue';"]
COPY --from=builder C:\\retina\\controller.exe controller.exe
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ data:
remoteContext: {{ .Values.remoteContext }}
enableAnnotations: {{ .Values.enableAnnotations }}
bypassLookupIPOfInterest: {{ .Values.bypassLookupIPOfInterest }}
dataAggregationLevel: {{ .Values.dataAggregationLevel }}
{{- end}}
---
{{- if .Values.os.windows}}
Expand Down
3 changes: 2 additions & 1 deletion deploy/hubble/manifests/controller/helm/retina/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ enablePodLevel: true
remoteContext: false
enableAnnotations: false
bypassLookupIPOfInterest: true
dataAggregationLevel: "high"

imagePullSecrets: []
nameOverride: "retina"
Expand Down Expand Up @@ -178,7 +179,7 @@ hubble:
# Possible values are:
# 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023,
# 2047, 4095, 8191, 16383, 32767, 65535
# eventBufferCapacity: "4095"
eventBufferCapacity: "4095"

# -- Hubble metrics configuration.
# See https://docs.cilium.io/en/stable/observability/metrics/#hubble-metrics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ data:
enablePodLevel: {{ .Values.enablePodLevel }}
remoteContext: {{ .Values.remoteContext }}
enableAnnotations: {{ .Values.enableAnnotations }}
bypassLookupIPOfInterest: {{ .Values.bypassLookupIPOfInterest }}
dataAggregationLevel: {{ .Values.dataAggregationLevel }}
{{- end}}
---
{{- if .Values.os.windows}}
Expand Down
1 change: 1 addition & 0 deletions deploy/legacy/manifests/controller/helm/retina/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ enablePodLevel: false
remoteContext: false
enableAnnotations: false
bypassLookupIPOfInterest: false
dataAggregationLevel: "low"

imagePullSecrets: []
nameOverride: "retina"
Expand Down
7 changes: 7 additions & 0 deletions docs/concepts/dataAggregation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Data Aggregation

Under Retina's hood, data are communicate between plugins and the control plane via [`Flow` objects](https://github.com/cilium/cilium/tree/main/api/v1/flow). Retina's data aggregation settings are designed to manage the amount of data that can be potentially generate by the agent .i.e the number of `flows` being generated. At a higher aggregation level, fewer `flows` objects are produced, which ensures resource efficiency in large clusters. Conversely, a lower level of aggregation results in more `flow` objects being generated, offering more detailed information regarding packets being observed at different points in the Linux kernel.The operational behaviors of Retina at each aggregation level are detailed in the table below:
| Level | Description|
|--- |--- |
| `low` | `packetparser` will attach a bpf program to the node's default interface in the node namespace, which will help capture metrics for `TO_NETWORK` and `FROM_NETWORK` packets. This will give users a more granular view of packet flows and offers more reliable apiserver latency metrics. |
| `high` | `packetparser` will not attach a bpf program to the node's default interface in the node namespace. As a result, packet observation at this location will be disabled, leading to a reduction in metrics being generated. This configuration is recommended when scalability is the primary concern. However, it is important to note that, due to the absence of packet observation at the default interface, the apiserver latency metrics may not be as reliable. |
1 change: 1 addition & 0 deletions docs/installation/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Defaults are specified for each component in *deploy/legacy/manifests/controller
* `enabledPlugin_linux`: Array of enabled plugins for linux.
* `enabledPlugin_win`: Array of enabled plugins for windows.
* `metricsInterval`: the interval for which metrics will be gathered.
* `dataAggregationLevel`: This config defines the level of data aggregation for Retina. See [Data Aggregation](../concepts/dataAggregation.md) for more details.

## Operator Config

Expand Down
2 changes: 1 addition & 1 deletion operator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN --mount=type=cache,target="/root/.cache/go-build" \
-a -o retina-operator operator/main.go

##################### controller #######################
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/mirror/gcr/distroless/cc-debian11:latest@sha256:b53fbf5f81f4a120a489fedff2092e6fcbeacf7863fce3e45d99cc58dc230ccc as controller
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/mirror/gcr/distroless/cc-debian11:latest@sha256:b53fbf5f81f4a120a489fedff2092e6fcbeacf7863fce3e45d99cc58dc230ccc AS controller

WORKDIR /
COPY --from=builder /workspace/retina-operator .
Expand Down
2 changes: 1 addition & 1 deletion operator/Dockerfile.windows-2019
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22 as builder
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22 AS builder

# Build args
ARG VERSION
Expand Down
2 changes: 1 addition & 1 deletion operator/Dockerfile.windows-2022
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22 as builder
FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.22 AS builder

# Build args
ARG VERSION
Expand Down
65 changes: 63 additions & 2 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,54 @@ package config

import (
"fmt"
"reflect"
"strings"
"time"

"github.com/mitchellh/mapstructure"
"github.com/spf13/viper"
)

// Level defines the level of monitor aggregation.
type Level int

const (
Low Level = iota
High
)

func (l *Level) UnmarshalText(text []byte) error {
s := strings.ToLower(string(text))
switch s {
case "low":
*l = Low
case "high":
*l = High
default:
// Default to Low if the text is not recognized.
*l = Low
}
return nil
}

func (l *Level) String() string {
switch *l {
case Low:
return "low"
case High:
return "high"
default:
return ""
}
}

type Server struct {
Host string `yaml:"host"`
Port int `yaml:"port"`
}

type Config struct {
ApiServer Server `yaml:"apiServer"`
APIServer Server `yaml:"apiServer"`
LogLevel string `yaml:"logLevel"`
EnabledPlugin []string `yaml:"enabledPlugin"`
MetricsInterval time.Duration `yaml:"metricsInterval"`
Expand All @@ -25,6 +61,7 @@ type Config struct {
RemoteContext bool `yaml:"remoteContext"`
EnableAnnotations bool `yaml:"enableAnnotations"`
BypassLookupIPOfInterest bool `yaml:"bypassLookupIPOfInterest"`
DataAggregationLevel Level `yaml:"dataAggregationLevel"`
}

func GetConfig(cfgFilename string) (*Config, error) {
Expand All @@ -46,7 +83,14 @@ func GetConfig(cfgFilename string) (*Config, error) {
return nil, fmt.Errorf("fatal error config file: %s", err)
}
var config Config
err = viper.Unmarshal(&config)
decoderConfigOption := func(dc *mapstructure.DecoderConfig) {
dc.DecodeHook = mapstructure.ComposeDecodeHookFunc(
mapstructure.StringToTimeDurationHookFunc(), // default hook.
mapstructure.StringToSliceHookFunc(","), // default hook.
decodeLevelHook,
)
}
err = viper.Unmarshal(&config, decoderConfigOption)
if err != nil {
return nil, fmt.Errorf("fatal error config file: %s", err)
}
Expand All @@ -55,3 +99,20 @@ func GetConfig(cfgFilename string) (*Config, error) {

return &config, nil
}

func decodeLevelHook(field, target reflect.Type, data interface{}) (interface{}, error) {
// Check if the field we are decoding is a string.
if field.Kind() != reflect.String {
return data, nil
}
// Check if the type we are decoding to is a Level.
if target != reflect.TypeOf(Level(0)) {
return data, nil
}
var level Level
err := level.UnmarshalText([]byte(data.(string)))
if err != nil {
return nil, err
}
return level, nil
}
30 changes: 27 additions & 3 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,48 @@
package config

import (
"reflect"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestGetConfig(t *testing.T) {
c, err := GetConfig("./testwith/config.yaml")
if err != nil {
t.Fatalf("Expected no error, instead got %+v", err)
}
if c.ApiServer.Host != "0.0.0.0" ||
c.ApiServer.Port != 10093 ||
if c.APIServer.Host != "0.0.0.0" ||
c.APIServer.Port != 10093 ||
c.LogLevel != "info" ||
c.MetricsInterval != 10*time.Second ||
len(c.EnabledPlugin) != 3 ||
c.EnablePodLevel ||
!c.EnableRetinaEndpoint ||
c.RemoteContext ||
c.EnableAnnotations {
c.EnableAnnotations ||
c.DataAggregationLevel != Low {
t.Fatalf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c)
}
}

func TestDecodeLevelHook(t *testing.T) {
tests := []struct {
input interface{}
expected interface{}
}{
{"low", Low},
{"high", High},
{"invalid", Low}, // Unimplemented or invalid input should default to Low
{123, 123}, // Non-string input should be returned as is
}

for _, test := range tests {
result, err := decodeLevelHook(reflect.TypeOf(test.input), reflect.TypeOf(Level(0)), test.input)
require.NoError(t, err)
assert.Equal(t, test.expected, result)

}
}
1 change: 1 addition & 0 deletions pkg/config/hubble_config_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var (
EnablePodLevel: true,
LogLevel: "info",
BypassLookupIPOfInterest: true,
DataAggregationLevel: High,
}

Cell = cell.Module(
Expand Down
1 change: 1 addition & 0 deletions pkg/config/testwith/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ enabledPlugin: ["dropreason", "packetforward", "linuxutil"]
metricsInterval: 10
# used to export telemetry to AppInsights
telemetryEnabled: true
dataAggregationLevel: "low"
4 changes: 2 additions & 2 deletions pkg/managers/controllermanager/controllermanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ func NewControllerManager(conf *kcfg.Config, kubeclient kubernetes.Interface, te

// create HTTP server for API server
httpServer := sm.NewHTTPServer(
conf.ApiServer.Host,
conf.ApiServer.Port,
conf.APIServer.Host,
conf.APIServer.Port,
)

return &Controller{
Expand Down
1 change: 1 addition & 0 deletions pkg/plugin/dropreason/dropreason_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ var (
MetricsInterval: 1 * time.Second,
EnablePodLevel: true,
BypassLookupIPOfInterest: true,
DataAggregationLevel: kcfg.Low,
}
cfgPodLevelDisabled = &kcfg.Config{
MetricsInterval: 1 * time.Second,
Expand Down
Loading

0 comments on commit 37d8401

Please sign in to comment.