Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making GC related fields tunable #2261

Merged
merged 4 commits into from
Feb 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,18 @@ func NewClient(cfg *config.Config, consulSyncer *consul.Syncer, logger *log.Logg
return nil, fmt.Errorf("failed to initialize client: %v", err)
}

// Add the stats collector and the garbage collector
// Add the stats collector
statsCollector := stats.NewHostStatsCollector(logger, c.config.AllocDir)
c.hostStatsCollector = statsCollector
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, cfg.Node.Reserved.DiskMB)

// Add the garbage collector
gcConfig := &GCConfig{
DiskUsageThreshold: cfg.GCDiskUsageThreshold,
InodeUsageThreshold: cfg.GCInodeUsageThreshold,
Interval: cfg.GCInterval,
ReservedDiskMB: cfg.Node.Reserved.DiskMB,
}
c.garbageCollector = NewAllocGarbageCollector(logger, statsCollector, gcConfig)

// Setup the node
if err := c.setupNode(); err != nil {
Expand Down
15 changes: 15 additions & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ type Config struct {
// TLSConfig holds various TLS related configurations
TLSConfig *config.TLSConfig

// GCInterval is the time interval at which the client triggers garbage
// collection
GCInterval time.Duration
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Website docs


// GCDiskUsageThreshold is the disk usage threshold beyond which the Nomad
// client triggers GC of terminal allocations
GCDiskUsageThreshold float64

// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCInodeUsageThreshold float64

// LogLevel is the level of the logs to putout
LogLevel string
}
Expand All @@ -177,6 +189,9 @@ func DefaultConfig() *Config {
StatsCollectionInterval: 1 * time.Second,
TLSConfig: &config.TLSConfig{},
LogLevel: "DEBUG",
GCInterval: 1 * time.Minute,
GCDiskUsageThreshold: 80,
GCInodeUsageThreshold: 70,
}
}

Expand Down
36 changes: 16 additions & 20 deletions client/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,6 @@ import (
)

const (
// diskUsageThreshold is the percent of used disk space beyond which Nomad
// GCs terminated allocations
diskUsageThreshold = 80

// gcInterval is the interval at which Nomad runs the garbage collector
gcInterval = 1 * time.Minute

// inodeUsageThreshold is the percent of inode usage that Nomad tries to
// maintain, whenever we are over it we will attempt to GC terminal
// allocations
inodeUsageThreshold = 70

// MB is a constant which converts values in bytes to MB
MB = 1024 * 1024
)
Expand Down Expand Up @@ -134,22 +122,30 @@ func (i *IndexedGCAllocPQ) Length() int {
return len(i.heap)
}

// GCConfig allows changing the behaviour of the garbage collector
type GCConfig struct {
DiskUsageThreshold float64
InodeUsageThreshold float64
Interval time.Duration
ReservedDiskMB int
}

// AllocGarbageCollector garbage collects terminated allocations on a node
type AllocGarbageCollector struct {
allocRunners *IndexedGCAllocPQ
statsCollector stats.NodeStatsCollector
reservedDiskMB int
config *GCConfig
logger *log.Logger
shutdownCh chan struct{}
}

// NewAllocGarbageCollector returns a garbage collector for terminated
// allocations on a node.
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, reservedDiskMB int) *AllocGarbageCollector {
func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStatsCollector, config *GCConfig) *AllocGarbageCollector {
gc := &AllocGarbageCollector{
allocRunners: NewIndexedGCAllocPQ(),
statsCollector: statsCollector,
reservedDiskMB: reservedDiskMB,
config: config,
logger: logger,
shutdownCh: make(chan struct{}),
}
Expand All @@ -159,7 +155,7 @@ func NewAllocGarbageCollector(logger *log.Logger, statsCollector stats.NodeStats
}

func (a *AllocGarbageCollector) run() {
ticker := time.NewTicker(gcInterval)
ticker := time.NewTicker(a.config.Interval)
for {
select {
case <-ticker.C:
Expand Down Expand Up @@ -195,8 +191,8 @@ func (a *AllocGarbageCollector) keepUsageBelowThreshold() error {
break
}

if diskStats.UsedPercent <= diskUsageThreshold &&
diskStats.InodesUsedPercent <= inodeUsageThreshold {
if diskStats.UsedPercent <= a.config.DiskUsageThreshold &&
diskStats.InodesUsedPercent <= a.config.InodeUsageThreshold {
break
}

Expand Down Expand Up @@ -266,10 +262,10 @@ func (a *AllocGarbageCollector) MakeRoomFor(allocations []*structs.Allocation) e
// we don't need to garbage collect terminated allocations
if hostStats := a.statsCollector.Stats(); hostStats != nil {
var availableForAllocations uint64
if hostStats.AllocDirStats.Available < uint64(a.reservedDiskMB*MB) {
if hostStats.AllocDirStats.Available < uint64(a.config.ReservedDiskMB*MB) {
availableForAllocations = 0
} else {
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.reservedDiskMB*MB)
availableForAllocations = hostStats.AllocDirStats.Available - uint64(a.config.ReservedDiskMB*MB)
}
if uint64(totalResource.DiskMB*MB) < availableForAllocations {
return nil
Expand Down
5 changes: 5 additions & 0 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,11 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) {
conf.TLSConfig = a.config.TLSConfig
conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP

// Set the GC related configs
conf.GCInterval = a.config.Client.GCInterval
conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold

return conf, nil
}

Expand Down
3 changes: 3 additions & 0 deletions command/agent/config-test-fixtures/basic.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ client {
data_points = 35
collection_interval = "5s"
}
gc_interval = "6s"
gc_disk_usage_threshold = 82
gc_inode_usage_threshold = 91
}
server {
enabled = true
Expand Down
28 changes: 23 additions & 5 deletions command/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,18 @@ type ClientConfig struct {
// be used to target a certain utilization or to prevent Nomad from using a
// particular set of ports.
Reserved *Resources `mapstructure:"reserved"`

// GCInterval is the time interval at which the client triggers garbage
// collection
GCInterval time.Duration `mapstructure:"gc_interval"`

// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCDiskUsageThreshold float64 `mapstructure:"gc_disk_usage_threshold"`

// GCInodeUsageThreshold is the inode usage threshold beyond which the Nomad
// client triggers GC of the terminal allocations
GCInodeUsageThreshold float64 `mapstructure:"gc_inode_usage_threshold"`
}

// ServerConfig is configuration specific to the server mode
Expand Down Expand Up @@ -465,6 +477,9 @@ func DevConfig() *Config {
conf.Client.Options = map[string]string{
"driver.docker.volumes": "true",
}
conf.Client.GCInterval = 10 * time.Minute
conf.Client.GCDiskUsageThreshold = 99
conf.Client.GCInodeUsageThreshold = 99

return conf
}
Expand All @@ -487,11 +502,14 @@ func DefaultConfig() *Config {
Consul: config.DefaultConsulConfig(),
Vault: config.DefaultVaultConfig(),
Client: &ClientConfig{
Enabled: false,
MaxKillTimeout: "30s",
ClientMinPort: 14000,
ClientMaxPort: 14512,
Reserved: &Resources{},
Enabled: false,
MaxKillTimeout: "30s",
ClientMinPort: 14000,
ClientMaxPort: 14512,
Reserved: &Resources{},
GCInterval: 1 * time.Minute,
GCInodeUsageThreshold: 70,
GCDiskUsageThreshold: 80,
},
Server: &ServerConfig{
Enabled: false,
Expand Down
13 changes: 12 additions & 1 deletion command/agent/config_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
"client_min_port",
"reserved",
"stats",
"gc_interval",
"gc_disk_usage_threshold",
"gc_inode_usage_threshold",
}
if err := checkHCLKeys(listVal, valid); err != nil {
return err
Expand All @@ -358,7 +361,15 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
delete(m, "stats")

var config ClientConfig
if err := mapstructure.WeakDecode(m, &config); err != nil {
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
DecodeHook: mapstructure.StringToTimeDurationHookFunc(),
WeaklyTypedInput: true,
Result: &config,
})
if err != nil {
return err
}
if err := dec.Decode(m); err != nil {
return err
}

Expand Down
3 changes: 3 additions & 0 deletions command/agent/config_parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ func TestConfig_Parse(t *testing.T) {
ReservedPorts: "1,100,10-12",
ParsedReservedPorts: []int{1, 10, 11, 12, 100},
},
GCInterval: 6 * time.Second,
GCDiskUsageThreshold: 82,
GCInodeUsageThreshold: 91,
},
Server: &ServerConfig{
Enabled: true,
Expand Down
1 change: 1 addition & 0 deletions scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,4 @@ done
echo
echo "==> Results:"
tree pkg/

9 changes: 9 additions & 0 deletions website/source/docs/agent/configuration/client.html.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,15 @@ client {
[data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with
"client", like `"/opt/nomad/client"`. This must be an absolute path.

- `gc_interval` `(string: "1m")` - Specifies the interval at which Nomad
attempts to garbage collect terminal allocation directories.

- `gc_disk_usage_threshold` `(float: 80)` - Specifies the disk usage percent which
Nomad tries to maintain by garbage collecting terminal allocations.

- `gc_inode_usage_threshold` `(float: 70)` - Specifies the inode usage percent
which Nomad tries to maintain by garbage collecting terminal allocations.

### `chroot_env` Parameters

Drivers based on [isolated fork/exec](/docs/drivers/exec.html) implement file
Expand Down