Skip to content

Commit

Permalink
use a hybrid health check for wlm kubeapiserver collector
Browse files Browse the repository at this point in the history
  • Loading branch information
adel121 committed Dec 9, 2024
1 parent 2491564 commit 6dac18c
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 25 deletions.
3 changes: 2 additions & 1 deletion comp/core/healthprobe/impl/healthprobe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ import (
"net/http/httptest"
"testing"

"github.com/stretchr/testify/assert"

healthprobeComponent "github.com/DataDog/datadog-agent/comp/core/healthprobe/def"
logmock "github.com/DataDog/datadog-agent/comp/core/log/mock"
compdef "github.com/DataDog/datadog-agent/comp/def"
"github.com/DataDog/datadog-agent/pkg/status/health"
"github.com/stretchr/testify/assert"
)

func TestServer(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ func runStartupCheck(ctx context.Context, stores []*reflectorStore) {
// There is no way to ensure liveness correctly as it would need to be plugged inside the
// inner loop of Reflector.
// However, we add Startup when we got at least some data.
startupHealthCheck := health.RegisterStartup(componentName)
startupHealthCheck := health.RegisterReadiness(componentName, health.Once)

// Checked synced, in its own scope to cleanly un-reference the syncTimer
{
Expand Down
16 changes: 9 additions & 7 deletions pkg/status/health/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,23 @@ import (

var readinessAndLivenessCatalog = newCatalog()
var readinessOnlyCatalog = newCatalog()
var startupOnlyCatalog = newStartupCatalog()
var startupOnlyCatalog = newCatalog()

// RegisterReadiness registers a component for readiness check with the default 30 seconds timeout, returns a token
func RegisterReadiness(name string) *Handle {
return readinessOnlyCatalog.register(name)
func RegisterReadiness(name string, options ...Option) *Handle {
return readinessOnlyCatalog.register(name, options...)
}

// RegisterLiveness registers a component for liveness check with the default 30 seconds timeout, returns a token
func RegisterLiveness(name string) *Handle {
return readinessAndLivenessCatalog.register(name)
func RegisterLiveness(name string, options ...Option) *Handle {
return readinessAndLivenessCatalog.register(name, options...)
}

// RegisterStartup registers a component for startup check, returns a token
func RegisterStartup(name string) *Handle {
return startupOnlyCatalog.register(name)
func RegisterStartup(name string, options ...Option) *Handle {
// Startup health checks are registered with Once option because, by design, they should stop being checked
// once they are marked as healthy once
return startupOnlyCatalog.register(name, append(options, Once)...)
}

// Deregister a component from the healthcheck
Expand Down
23 changes: 10 additions & 13 deletions pkg/status/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,25 @@ type component struct {
name string
healthChan chan time.Time
healthy bool
// if set to true, once the check is healthy, we mark it as healthy forever and we stop checking it
once bool
}

type catalog struct {
sync.RWMutex
components map[*Handle]*component
latestRun time.Time
startup bool
}

func newCatalog() *catalog {
return &catalog{
components: make(map[*Handle]*component),
latestRun: time.Now(), // Start healthy
startup: false,
}
}

func newStartupCatalog() *catalog {
return &catalog{
components: make(map[*Handle]*component),
latestRun: time.Now(), // Start healthy
startup: true,
}
}

// register a component with the default 30 seconds timeout, returns a token
func (c *catalog) register(name string) *Handle {
func (c *catalog) register(name string, options ...Option) *Handle {
c.Lock()
defer c.Unlock()

Expand All @@ -68,6 +60,11 @@ func (c *catalog) register(name string) *Handle {
healthChan: make(chan time.Time, bufferSize),
healthy: false,
}

for _, option := range options {
option(component)
}

h := &Handle{
C: component.healthChan,
}
Expand Down Expand Up @@ -107,8 +104,8 @@ func (c *catalog) pingComponents(healthDeadline time.Time) bool {
c.Lock()
defer c.Unlock()
for _, component := range c.components {
// In startup mode, we skip already healthy components.
if c.startup && component.healthy {
// We skip components that are registered to be skipped once they pass once
if component.healthy && component.once {
continue
}
select {
Expand Down
6 changes: 3 additions & 3 deletions pkg/status/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ func TestGetHealthy(t *testing.T) {
assert.Len(t, status.Unhealthy, 0)
}

func TestStartupCatalog(t *testing.T) {
cat := newStartupCatalog()
token := cat.register("test1")
func TestCatalogWithOnceComponent(t *testing.T) {
cat := newCatalog()
token := cat.register("test1", Once)

// Start unhealthy
status := cat.getStatus()
Expand Down
14 changes: 14 additions & 0 deletions pkg/status/health/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

// Package health implements the internal healthcheck
package health

type Option func(*component)

// Once has the effect of not checking the health of a component once it has been marked healthy once
func Once(c *component) {
c.once = true
}

0 comments on commit 6dac18c

Please sign in to comment.