Skip to content
This repository has been archived by the owner on Jul 1, 2023. It is now read-only.

Commit

Permalink
Add a warning watermark to the disk check
Browse files Browse the repository at this point in the history
- Set default critical watermark to 90%
  • Loading branch information
bernardjkim committed Jun 5, 2020
1 parent cffc94e commit 9509c0a
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 17 deletions.
38 changes: 32 additions & 6 deletions monitoring/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package monitoring
import (
"fmt"

"github.com/gravitational/trace"

humanize "github.com/dustin/go-humanize"
)

Expand All @@ -38,10 +40,25 @@ type StorageConfig struct {
HighWatermark uint
}

// CheckAndSetDefaults validates that this configuration is correct and sets
// value defaults where necessary.
func (c *StorageConfig) CheckAndSetDefaults() error {
var errors []error
if c.Path == "" {
errors = append(errors, trace.BadParameter("volume path must be provided"))
}
if c.HighWatermark == 0 {
c.HighWatermark = DefaultCriticalWatermark
}
return trace.NewAggregate(errors...)
}

// HighWatermarkCheckerData is attached to high watermark check results
type HighWatermarkCheckerData struct {
// HighWatermark is the watermark percentage value
HighWatermark uint `json:"high_watermark"`
// WatermarkWarning is the watermark warning percentage value
WatermarkWarning uint `json:"watermark_warning"`
// WatermarkCritical is the watermark critical percentage value
WatermarkCritical uint `json:"watermark_critical"`
// Path is the absolute path to check
Path string `json:"path"`
// TotalBytes is the total disk capacity
Expand All @@ -50,17 +67,26 @@ type HighWatermarkCheckerData struct {
AvailableBytes uint64 `json:"available_bytes"`
}

// FailureMessage returns failure watermark check message
func (d HighWatermarkCheckerData) FailureMessage() string {
// WarningMessage returns warning watermark check message
func (d HighWatermarkCheckerData) WarningMessage() string {
return fmt.Sprintf("disk utilization on %s exceeds %v percent (%s is available out of %s), see https://gravitational.com/telekube/docs/cluster/#garbage-collection",
d.Path, d.HighWatermark, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
d.Path, d.WatermarkWarning, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
}

// CriticalMessage returns critical watermark check message
func (d HighWatermarkCheckerData) CriticalMessage() string {
return fmt.Sprintf("disk utilization on %s exceeds %v percent (%s is available out of %s), see https://gravitational.com/telekube/docs/cluster/#garbage-collection",
d.Path, d.WatermarkCritical, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
}

// SuccessMessage returns success watermark check message
func (d HighWatermarkCheckerData) SuccessMessage() string {
return fmt.Sprintf("disk utilization on %s is below %v percent (%s is available out of %s)",
d.Path, d.HighWatermark, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
d.Path, d.WatermarkWarning, humanize.Bytes(d.AvailableBytes), humanize.Bytes(d.TotalBytes))
}

// DiskSpaceCheckerID is the checker that checks disk space utilization
const DiskSpaceCheckerID = "disk-space"

// DefaultCriticalWatermark is the default critical disk usage percentage threshold.
const DefaultCriticalWatermark = 90
44 changes: 33 additions & 11 deletions monitoring/storage_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ import (

// NewStorageChecker creates a new instance of the volume checker
// using the specified checker as configuration
func NewStorageChecker(config StorageConfig) health.Checker {
func NewStorageChecker(config StorageConfig) (health.Checker, error) {
if err := config.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}

return &storageChecker{
StorageConfig: config,
osInterface: &realOS{},
}
}, nil
}

// storageChecker verifies volume requirements
Expand Down Expand Up @@ -155,30 +159,48 @@ func (c *storageChecker) checkHighWatermark(ctx context.Context, reporter health
return trace.BadParameter("disk capacity at %v is 0", c.path)
}
checkerData := HighWatermarkCheckerData{
HighWatermark: c.HighWatermark,
Path: c.Path,
TotalBytes: totalBytes,
AvailableBytes: availableBytes,
WatermarkCritical: c.HighWatermark,
WatermarkWarning: c.HighWatermark - 10, // Set warning watermark 10% below the critical watermark
Path: c.Path,
TotalBytes: totalBytes,
AvailableBytes: availableBytes,
}
checkerDataBytes, err := json.Marshal(checkerData)
if err != nil {
return trace.Wrap(err)
}
if float64(totalBytes-availableBytes)/float64(totalBytes)*100 > float64(c.HighWatermark) {

diskUsagePercent := float64(totalBytes-availableBytes) / float64(totalBytes) * 100

if diskUsagePercent > float64(checkerData.WatermarkCritical) {
reporter.Add(&pb.Probe{
Checker: DiskSpaceCheckerID,
Detail: checkerData.FailureMessage(),
Detail: checkerData.CriticalMessage(),
CheckerData: checkerDataBytes,
Status: pb.Probe_Failed,
Severity: pb.Probe_Critical,
})
} else {
return nil
}

if diskUsagePercent > float64(checkerData.WatermarkWarning) {
reporter.Add(&pb.Probe{
Checker: DiskSpaceCheckerID,
Detail: checkerData.SuccessMessage(),
Detail: checkerData.WarningMessage(),
CheckerData: checkerDataBytes,
Status: pb.Probe_Running,
Status: pb.Probe_Failed,
Severity: pb.Probe_Warning,
})
return nil
}

reporter.Add(&pb.Probe{
Checker: DiskSpaceCheckerID,
Detail: checkerData.SuccessMessage(),
CheckerData: checkerDataBytes,
Status: pb.Probe_Running,
})

return nil
}

Expand Down

0 comments on commit 9509c0a

Please sign in to comment.