Skip to content
This repository has been archived by the owner on Jul 1, 2023. It is now read-only.

Update disk check #224

Merged
merged 2 commits into from
Jun 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions monitoring/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ func GetStorageDriverBootConfigParams(drv string) health.Checker {

// NewStorageChecker creates a new instance of the volume checker
// using the specified checker as configuration
func NewStorageChecker(config StorageConfig) health.Checker {
return noopChecker{}
func NewStorageChecker(config StorageConfig) (health.Checker, error) {
return noopChecker{}, nil
}

// NewDNSChecker sends some default queries to monitor DNS / service discovery health
Expand Down
65 changes: 58 additions & 7 deletions monitoring/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package monitoring
import (
"fmt"

"github.com/gravitational/trace"

humanize "github.com/dustin/go-humanize"
)

Expand All @@ -34,13 +36,48 @@ type StorageConfig struct {
Filesystems []string
// MinFreeBytes define minimum free volume capacity
MinFreeBytes uint64
// HighWatermark is the disk occupancy percentage that is considered degrading
// LowWatermark is the disk occupancy percentage that will trigger a warning probe
LowWatermark uint
// HighWatermark is the disk occupancy percentage that will trigger a critical probe
HighWatermark uint
}

// CheckAndSetDefaults validates that this configuration is correct and sets
// value defaults where necessary.
func (c *StorageConfig) CheckAndSetDefaults() error {
var errors []error
if c.Path == "" {
errors = append(errors, trace.BadParameter("volume path must be provided"))
}

if c.LowWatermark > 100 {
errors = append(errors, trace.BadParameter("low watermark must be 0-100"))
}

if c.HighWatermark > 100 {
errors = append(errors, trace.BadParameter("high watermark must be 0-100"))
}

if c.LowWatermark == 0 {
c.LowWatermark = DefaultLowWatermark
}

if c.HighWatermark == 0 {
c.HighWatermark = DefaultHighWatermark
}

if c.LowWatermark > c.HighWatermark {
c.LowWatermark = c.HighWatermark
}

return trace.NewAggregate(errors...)
}

// HighWatermarkCheckerData is attached to high watermark check results
type HighWatermarkCheckerData struct {
// HighWatermark is the watermark percentage value
// LowWatermark is the low watermark percentage value
LowWatermark uint `json:"low_watermark"`
// HighWatermark is the high watermark percentage value
HighWatermark uint `json:"high_watermark"`
// Path is the absolute path to check
Path string `json:"path"`
Expand All @@ -50,17 +87,31 @@ type HighWatermarkCheckerData struct {
AvailableBytes uint64 `json:"available_bytes"`
}

// FailureMessage returns failure watermark check message
func (d HighWatermarkCheckerData) FailureMessage() string {
return fmt.Sprintf("disk utilization on %s exceeds %v percent (%s is available out of %s), see https://gravitational.com/telekube/docs/cluster/#garbage-collection",
d.Path, d.HighWatermark, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
// WarningMessage returns warning watermark check message
func (d HighWatermarkCheckerData) WarningMessage() string {
diskUsage := float64(d.TotalBytes-d.AvailableBytes) / float64(d.TotalBytes) * 100
return fmt.Sprintf("disk utilization on %s exceeds %v%%, currently at %v%% (%s is available out of %s), cluster will degrade if usage exceeds %v%%, see https://gravitational.com/gravity/docs/cluster/#garbage-collection",
d.Path, d.LowWatermark, diskUsage, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes), d.HighWatermark)
}

// CriticalMessage returns critical watermark check message
func (d HighWatermarkCheckerData) CriticalMessage() string {
diskUsage := float64(d.TotalBytes-d.AvailableBytes) / float64(d.TotalBytes) * 100
return fmt.Sprintf("disk utilization on %s exceeds %v%%, currently at %v%% (%s is available out of %s), see https://gravitational.com/gravity/docs/cluster/#garbage-collection",
d.Path, d.HighWatermark, diskUsage, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
}

// SuccessMessage returns success watermark check message
func (d HighWatermarkCheckerData) SuccessMessage() string {
return fmt.Sprintf("disk utilization on %s is below %v percent (%s is available out of %s)",
return fmt.Sprintf("disk utilization on %s is below %v%% (%s is available out of %s)",
d.Path, d.HighWatermark, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
}

// DiskSpaceCheckerID is the checker that checks disk space utilization
const DiskSpaceCheckerID = "disk-space"

// DefaultLowWatermark is the default low watermark percentage.
const DefaultLowWatermark = 80

// DefaultHighWatermark is the default high watermark percentage.
const DefaultHighWatermark = 90
42 changes: 33 additions & 9 deletions monitoring/storage_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ import (

// NewStorageChecker creates a new instance of the volume checker
// using the specified checker as configuration
func NewStorageChecker(config StorageConfig) health.Checker {
func NewStorageChecker(config StorageConfig) (health.Checker, error) {
if err := config.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}

return &storageChecker{
StorageConfig: config,
osInterface: &realOS{},
}
}, nil
}

// storageChecker verifies volume requirements
Expand Down Expand Up @@ -84,7 +88,7 @@ func (c *storageChecker) check(ctx context.Context, reporter health.Reporter) er

return trace.NewAggregate(c.checkFsType(ctx, reporter),
c.checkCapacity(ctx, reporter),
c.checkHighWatermark(ctx, reporter),
c.checkDiskUsage(ctx, reporter),
c.checkWriteSpeed(ctx, reporter))
}

Expand Down Expand Up @@ -143,7 +147,9 @@ func (c *storageChecker) checkFsType(ctx context.Context, reporter health.Report
return nil
}

func (c *storageChecker) checkHighWatermark(ctx context.Context, reporter health.Reporter) error {
// checkDiskUsage checks the disk usage. A warning or critical probe will be
// reported if the usage percentage is above the set thresholds.
func (c *storageChecker) checkDiskUsage(ctx context.Context, reporter health.Reporter) error {
if c.HighWatermark == 0 {
return nil
}
Expand All @@ -155,6 +161,7 @@ func (c *storageChecker) checkHighWatermark(ctx context.Context, reporter health
return trace.BadParameter("disk capacity at %v is 0", c.path)
}
checkerData := HighWatermarkCheckerData{
LowWatermark: c.LowWatermark,
HighWatermark: c.HighWatermark,
Path: c.Path,
TotalBytes: totalBytes,
Expand All @@ -164,21 +171,38 @@ func (c *storageChecker) checkHighWatermark(ctx context.Context, reporter health
if err != nil {
return trace.Wrap(err)
}
if float64(totalBytes-availableBytes)/float64(totalBytes)*100 > float64(c.HighWatermark) {

diskUsagePercent := float64(totalBytes-availableBytes) / float64(totalBytes) * 100

if diskUsagePercent > float64(checkerData.HighWatermark) {
reporter.Add(&pb.Probe{
Checker: DiskSpaceCheckerID,
Detail: checkerData.FailureMessage(),
Detail: checkerData.CriticalMessage(),
CheckerData: checkerDataBytes,
Status: pb.Probe_Failed,
Severity: pb.Probe_Critical,
})
} else {
return nil
}

if diskUsagePercent > float64(checkerData.LowWatermark) {
reporter.Add(&pb.Probe{
Checker: DiskSpaceCheckerID,
Detail: checkerData.SuccessMessage(),
Detail: checkerData.WarningMessage(),
CheckerData: checkerDataBytes,
Status: pb.Probe_Running,
Status: pb.Probe_Failed,
Severity: pb.Probe_Warning,
})
return nil
}

reporter.Add(&pb.Probe{
Checker: DiskSpaceCheckerID,
Detail: checkerData.SuccessMessage(),
CheckerData: checkerDataBytes,
Status: pb.Probe_Running,
})

return nil
}

Expand Down
18 changes: 15 additions & 3 deletions monitoring/storage_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,19 +105,31 @@ func (_ *StorageSuite) TestStorage(c *C) {
StorageConfig: StorageConfig{
Path: path.Join("/tmp", fmt.Sprintf("%d", time.Now().Unix())),
WillBeCreated: true,
HighWatermark: 40,
LowWatermark: 60,
HighWatermark: 80,
},
osInterface: testOS{mountList: mounts, bytesAvail: 2048},
}.probe(c, "high watermark is reached", shallFail)
}.probe(c, "low watermark is not reached", shallSucceed)

storageChecker{
StorageConfig: StorageConfig{
Path: path.Join("/tmp", fmt.Sprintf("%d", time.Now().Unix())),
WillBeCreated: true,
LowWatermark: 40,
HighWatermark: 60,
},
osInterface: testOS{mountList: mounts, bytesAvail: 2048},
}.probe(c, "high watermark is not reached", shallSucceed)
}.probe(c, "low watermark is reached", shallFail)

storageChecker{
StorageConfig: StorageConfig{
Path: path.Join("/tmp", fmt.Sprintf("%d", time.Now().Unix())),
WillBeCreated: true,
LowWatermark: 20,
HighWatermark: 40,
},
osInterface: testOS{mountList: mounts, bytesAvail: 2048},
}.probe(c, "high watermark is reached", shallFail)
}

func (_ *StorageSuite) TestMatchesFilesystem(c *C) {
Expand Down