diff --git a/client/client.go b/client/client.go index 41267264cf4..620b853daa2 100644 --- a/client/client.go +++ b/client/client.go @@ -225,10 +225,18 @@ func NewClient(cfg *config.Config, consulSyncer *consul.Syncer, logger *log.Logg return nil, fmt.Errorf("failed to initialize client: %v", err) } - // Add the stats collector and the garbage collector + // Add the stats collector statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir) c.hostStatsCollector = statsCollector - c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, cfg.Node.Reserved.DiskMB) + + // Add the garbage collector + gcConfig := &GCConfig{ + DiskUsageThreshold: cfg.GCDiskUsageThreshold, + InodeUsageThreshold: cfg.GCInodeUsageThreshold, + Interval: cfg.GCInterval, + ReservedDiskMB: cfg.Node.Reserved.DiskMB, + } + c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, gcConfig) // Setup the node if err := c.setupNode(); err != nil { diff --git a/client/config/config.go b/client/config/config.go index 865c7ac9daf..3680fd709cd 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -151,6 +151,18 @@ type Config struct { // TLSConfig holds various TLS related configurations TLSConfig *config.TLSConfig + // GCInterval is the time interval at which the client triggers garbage + // collection + GCInterval time.Duration + + // GCDiskUsageThreshold is the disk usage threshold beyond which the Nomad + // client triggers GC of terminal allocations + GCDiskUsageThreshold float64 + + // GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad + // client triggers GC of the terminal allocations + GCInodeUsageThreshold float64 + // LogLevel is the level of the logs to putout LogLevel string } @@ -177,6 +189,9 @@ func DefaultConfig() *Config { StatsCollectionInterval: 1 * time.Second, TLSConfig: &config.TLSConfig{}, LogLevel: "DEBUG", + GCInterval: 1 * time.Minute, + GCDiskUsageThreshold: 80, + GCInodeUsageThreshold: 70, } } diff --git a/client/gc.go b/client/gc.go index 99d992814f7..5861dee0659 100644 --- a/client/gc.go +++ b/client/gc.go @@ -12,18 +12,6 @@ import ( ) const ( - // diskUsageThreshold is the percent of used disk space beyond which Nomad - // GCs terminated allocations - diskUsageThreshold = 80 - - // gcInterval is the interval at which Nomad runs the garbage collector - gcInterval = 1 * time.Minute - - // inodeUsageThreshold is the percent of inode usage that Nomad tries to - // maintain, whenever we are over it we will attempt to GC terminal - // allocations - inodeUsageThreshold = 70 - // MB is a constant which converts values in bytes to MB MB = 1024 * 1024 ) @@ -134,22 +122,30 @@ func (i *IndexedGCAllocPQ) Length() int { return len(i.heap) } +// GCConfig allows changing the behaviour of the garbage collector +type GCConfig struct { + DiskUsageThreshold float64 + InodeUsageThreshold float64 + Interval time.Duration + ReservedDiskMB int +} + // AllocGarbageCollector garbage collects terminated allocations on a node type AllocGarbageCollector struct { allocRunners *IndexedGCAllocPQ statsCollector stats.NodeStatsCollector - reservedDiskMB int + config *GCConfig logger *log.Logger shutdownCh chan struct{} } // NewAllocGarbageCollector returns a garbage collector for terminated // allocations on a node. -func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, reservedDiskMB int) *AllocGarbageCollector { +func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, config *GCConfig) *AllocGarbageCollector { gc := &AllocGarbageCollector{ allocRunners: NewIndexedGCAllocPQ(), statsCollector: statsCollector, - reservedDiskMB: reservedDiskMB, + config: config, logger: logger, shutdownCh: make(chan struct{}), } @@ -159,7 +155,7 @@ func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStats } func (a *AllocGarbageCollector) run() { - ticker := time.NewTicker(gcInterval) + ticker := time.NewTicker(a.config.Interval) for { select { case <-ticker.C: @@ -195,8 +191,8 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error { break } - if diskStats.UsedPercent <= diskUsageThreshold && - diskStats.InodesUsedPercent <= inodeUsageThreshold { + if diskStats.UsedPercent <= a.config.DiskUsageThreshold && + diskStats.InodesUsedPercent <= a.config.InodeUsageThreshold { break } @@ -266,10 +262,10 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e // we don't need to garbage collect terminated allocations if hostStats := a.statsCollector.Stats(); hostStats != nil { var availableForAllocations uint64 - if hostStats.AllocDirStats.Available < uint64(a.reservedDiskMB*MB) { + if hostStats.AllocDirStats.Available < uint64(a.config.ReservedDiskMB*MB) { availableForAllocations = 0 } else { - availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.reservedDiskMB*MB) + availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.config.ReservedDiskMB*MB) } if uint64(totalResource.DiskMB*MB) < availableForAllocations { return nil diff --git a/command/agent/agent.go b/command/agent/agent.go index dc835028d73..05aaa4734d9 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -288,6 +288,11 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) { conf.TLSConfig = a.config.TLSConfig conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP + // Set the GC related configs + conf.GCInterval = a.config.Client.GCInterval + conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold + conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold + return conf, nil } diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 587d6842386..1c5e2001dde 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -53,6 +53,9 @@ client { data_points = 35 collection_interval = "5s" } + gc_interval = "6s" + gc_disk_usage_threshold = 82 + gc_inode_usage_threshold = 91 } server { enabled = true diff --git a/command/agent/config.go b/command/agent/config.go index d2dddecaad1..a57237de53f 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -197,6 +197,18 @@ type ClientConfig struct { // be used to target a certain utilization or to prevent Nomad from using a // particular set of ports. Reserved *Resources `mapstructure:"reserved"` + + // GCInterval is the time interval at which the client triggers garbage + // collection + GCInterval time.Duration `mapstructure:"gc_interval"` + + // GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad + // client triggers GC of the terminal allocations + GCDiskUsageThreshold float64 `mapstructure:"gc_disk_usage_threshold"` + + // GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad + // client triggers GC of the terminal allocations + GCInodeUsageThreshold float64 `mapstructure:"gc_inode_usage_threshold"` } // ServerConfig is configuration specific to the server mode @@ -465,6 +477,9 @@ func DevConfig() *Config { conf.Client.Options = map[string]string{ "driver.docker.volumes": "true", } + conf.Client.GCInterval = 10 * time.Minute + conf.Client.GCDiskUsageThreshold = 99 + conf.Client.GCInodeUsageThreshold = 99 return conf } @@ -487,11 +502,14 @@ func DefaultConfig() *Config { Consul: config.DefaultConsulConfig(), Vault: config.DefaultVaultConfig(), Client: &ClientConfig{ - Enabled: false, - MaxKillTimeout: "30s", - ClientMinPort: 14000, - ClientMaxPort: 14512, - Reserved: &Resources{}, + Enabled: false, + MaxKillTimeout: "30s", + ClientMinPort: 14000, + ClientMaxPort: 14512, + Reserved: &Resources{}, + GCInterval: 1 * time.Minute, + GCInodeUsageThreshold: 70, + GCDiskUsageThreshold: 80, }, Server: &ServerConfig{ Enabled: false, diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index 9dfbf6aaf8e..a9320e5397c 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -341,6 +341,9 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { "client_min_port", "reserved", "stats", + "gc_interval", + "gc_disk_usage_threshold", + "gc_inode_usage_threshold", } if err := checkHCLKeys(listVal, valid); err != nil { return err @@ -358,7 +361,15 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { delete(m, "stats") var config ClientConfig - if err := mapstructure.WeakDecode(m, &config); err != nil { + dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: mapstructure.StringToTimeDurationHookFunc(), + WeaklyTypedInput: true, + Result: &config, + }) + if err != nil { + return err + } + if err := dec.Decode(m); err != nil { return err } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 17268d6b393..29cf3aef853 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -70,6 +70,9 @@ func TestConfig_Parse(t *testing.T) { ReservedPorts: "1,100,10-12", ParsedReservedPorts: []int{1, 10, 11, 12, 100}, }, + GCInterval: 6 * time.Second, + GCDiskUsageThreshold: 82, + GCInodeUsageThreshold: 91, }, Server: &ServerConfig{ Enabled: true, diff --git a/scripts/build.sh b/scripts/build.sh index 1273db8d85a..f1aa7fe48ef 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -84,3 +84,4 @@ done echo echo "==> Results:" tree pkg/ + diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index e06b05aab34..019dea7572a 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -83,6 +83,15 @@ client { [data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with "client", like `"/opt/nomad/client"`. This must be an absolute path. +- `gc_interval` `(string: "1m")` - Specifies the interval at which Nomad + attempts to garbage collect terminal allocation directories. + +- `gc_disk_usage_threshold` `(float: 80)` - Specifies the disk usage percent which + Nomad tries to maintain by garbage collecting terminal allocations. + +- `gc_inode_usage_threshold` `(float: 70)` - Specifies the inode usage percent + which Nomad tries to maintain by garbage collecting terminal allocations. + ### `chroot_env` Parameters Drivers based on [isolated fork/exec](/docs/drivers/exec.html) implement file