Skip to content

Commit

Permalink
Define gRPC server for liveness probe
Browse files Browse the repository at this point in the history
Now, we use tetra status command to report the status of tetragon
agent. This comes with some overheads as tetra binary has a lot of
additional functionality and it seems like an overkill to use that for
status reporting.

On the other hand, k8s supports liveness probes by using an gRPC
endpoint (i.e.
https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-grpc-liveness-probe).
This patch first creates a dedicated gRPC server to report agent status that
can be used for the liveness probe.

Signed-off-by: Anastasios Papagiannis <[email protected]>
  • Loading branch information
tpapagian committed May 29, 2024
1 parent 4c47837 commit 132dd8a
Show file tree
Hide file tree
Showing 10 changed files with 917 additions and 0 deletions.
50 changes: 50 additions & 0 deletions cmd/tetragon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/cilium/tetragon/pkg/fileutils"
"github.com/cilium/tetragon/pkg/filters"
tetragonGrpc "github.com/cilium/tetragon/pkg/grpc"
"github.com/cilium/tetragon/pkg/health"
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/metrics/metricsconfig"
Expand Down Expand Up @@ -69,6 +70,8 @@ import (
"github.com/spf13/cobra/doc"
"github.com/spf13/viper"
"google.golang.org/grpc"
gh "google.golang.org/grpc/health"
"google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/protobuf/types/known/durationpb"
v1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
Expand Down Expand Up @@ -440,6 +443,53 @@ func tetragonExecute() error {
}
}

if option.Config.HealthServerAddress != "" {
// Create a new health server and mark it as serving.
healthServer := gh.NewServer()
healthServer.SetServingStatus("liveness", grpc_health_v1.HealthCheckResponse_SERVING)

// Create a new gRPC server for health checks and register the healthServer.
grpcHealthServer := grpc.NewServer()
grpc_health_v1.RegisterHealthServer(grpcHealthServer, healthServer)

// Start the gRPC server for the health checks.
go func() {
// the gRPC server for the health checks listens on port 6789
listener, err := net.Listen("tcp", option.Config.HealthServerAddress)
if err != nil {
log.WithError(err).Fatal("Failed to listen for gRPC healthserver")
}

log.WithField("address", option.Config.HealthServerAddress).WithField("interval", option.Config.HealthServerInterval).Info("Starting gRPC health server")
if err = grpcHealthServer.Serve(listener); err != nil {
log.WithError(err).Fatal("Failed to start gRPC healthserver")
}
}()

// Every 10 seconds check the agent health. To check if our agent is health we call
// health.GetHealth() and we report the status to the healthServer.
go func() {
ticker := time.NewTicker(time.Duration(option.Config.HealthServerInterval) * time.Second)
for {
select {
case <-ticker.C:
servingStatus := grpc_health_v1.HealthCheckResponse_NOT_SERVING
if response, err := health.GetHealth(); err == nil {
if st := response.GetHealthStatus(); len(st) > 0 && st[0].Status == tetragon.HealthStatusResult_HEALTH_STATUS_RUNNING {
servingStatus = grpc_health_v1.HealthCheckResponse_SERVING
}
}
healthServer.SetServingStatus("liveness", servingStatus)
case <-ctx.Done():
ticker.Stop()
healthServer.Shutdown() // set all services to NOT_SERVING
grpcHealthServer.Stop()
return
}
}
}()
}

log.WithField("enabled", option.Config.ExportFilename != "").WithField("fileName", option.Config.ExportFilename).Info("Exporter configuration")
obs.AddListener(pm)
saveInitInfo()
Expand Down
6 changes: 6 additions & 0 deletions docs/data/tetragon_flags.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ type config struct {
CgroupRate CgroupRate

UsernameMetadata int

HealthServerAddress string
HealthServerInterval int
}

var (
Expand Down
8 changes: 8 additions & 0 deletions pkg/option/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ const (
KeyCgroupRate = "cgroup-rate"

KeyUsernameMetadata = "username-metadata"

KeyHealthServerAddress = "health-server-address"
KeyHealthTimeInterval = "health-server-interval"
)

type UsernameMetadaCode int
Expand Down Expand Up @@ -211,6 +214,8 @@ func ReadAndSetFlags() error {
}

Config.CgroupRate = ParseCgroupRate(viper.GetString(KeyCgroupRate))
Config.HealthServerAddress = viper.GetString(KeyHealthServerAddress)
Config.HealthServerInterval = viper.GetInt(KeyHealthTimeInterval)
return nil
}

Expand Down Expand Up @@ -362,4 +367,7 @@ func AddFlags(flags *pflag.FlagSet) {
flags.String(KeyUsernameMetadata, "disabled", "Resolve UIDs to user names for processes running in host namespace")

flags.String(KeyCgroupRate, "", "Base sensor events cgroup rate <events,interval> disabled by default ('1000/1s' means rate 1000 events per second")

flags.String(KeyHealthServerAddress, ":6789", "Health server address (e.g. ':6789')(use '' to disabled it)")
flags.Int(KeyHealthTimeInterval, 10, "Health server interval in seconds")
}
117 changes: 117 additions & 0 deletions vendor/google.golang.org/grpc/health/client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 132dd8a

Please sign in to comment.