Skip to content

Commit

Permalink
Merge pull request #5479 from hashicorp/b-vault-renewal
Browse files Browse the repository at this point in the history
vault: fix renewal time
  • Loading branch information
schmichael authored Apr 16, 2019
2 parents eeb282c + 888304b commit 009b750
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 27 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ FEATURES:

* vault: Add initial support for Vault namespaces [[GH-5520](https://github.com/hashicorp/nomad/pull/5520)]
* allocations: Add support for restarting allocations in-place [[GH-5502](https://github.com/hashicorp/nomad/pull/5502)]

IMPROVEMENTS:

* core: Add node name to output of `nomad node status` command in verbose mode [[GH-5224](https://github.com/hashicorp/nomad/pull/5224)]
* core: Add `-verbose` flag to `nomad status` wrapper command [[GH-5516](https://github.com/hashicorp/nomad/pull/5516)]

BUG FIXES:

* vault: Fix renewal time to be 1/2 lease duration with jitter [[GH-5479](https://github.com/hashicorp/nomad/issues/5479)]

## 0.9.0 (April 9, 2019)

__BACKWARDS INCOMPATIBILITIES:__
Expand Down
68 changes: 46 additions & 22 deletions client/vaultclient/vaultclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,28 +194,36 @@ func (c *vaultClient) isTracked(id string) bool {
return ok
}

// isRunning returns true if the client is running.
func (c *vaultClient) isRunning() bool {
c.lock.RLock()
defer c.lock.RUnlock()
return c.running
}

// Starts the renewal loop of vault client
func (c *vaultClient) Start() {
c.lock.Lock()
defer c.lock.Unlock()

if !c.config.IsEnabled() || c.running {
return
}

c.lock.Lock()
c.running = true
c.lock.Unlock()

go c.run()
}

// Stops the renewal loop of vault client
func (c *vaultClient) Stop() {
c.lock.Lock()
defer c.lock.Unlock()

if !c.config.IsEnabled() || !c.running {
return
}

c.lock.Lock()
defer c.lock.Unlock()

c.running = false
close(c.stopCh)
}
Expand All @@ -235,7 +243,7 @@ func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string)
if !c.config.IsEnabled() {
return nil, fmt.Errorf("vault client not enabled")
}
if !c.running {
if !c.isRunning() {
return nil, fmt.Errorf("vault client is not running")
}

Expand Down Expand Up @@ -421,21 +429,9 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
}
}

duration := leaseDuration / 2
switch {
case leaseDuration < 30:
// Don't bother about introducing randomness if the
// leaseDuration is too small.
default:
// Give a breathing space of 20 seconds
min := 10
max := leaseDuration - min
rand.Seed(time.Now().Unix())
duration = min + rand.Intn(max-min)
}

// Determine the next renewal time
next := time.Now().Add(time.Duration(duration) * time.Second)
renewalDuration := renewalTime(rand.Intn, leaseDuration)
next := time.Now().Add(renewalDuration)

fatal := false
if renewalErr != nil &&
Expand All @@ -445,7 +441,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
strings.Contains(renewalErr.Error(), "permission denied")) {
fatal = true
} else if renewalErr != nil {
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "duration", duration)
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration)
c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying",
"error", renewalErr, "period", next)
}
Expand Down Expand Up @@ -517,7 +513,7 @@ func (c *vaultClient) run() {
}

var renewalCh <-chan time.Time
for c.config.IsEnabled() && c.running {
for c.config.IsEnabled() && c.isRunning() {
// Fetches the candidate for next renewal
renewalReq, renewalTime := c.nextRenewal()
if renewalTime.IsZero() {
Expand Down Expand Up @@ -728,3 +724,31 @@ func (h *vaultDataHeapImp) Pop() interface{} {
*h = old[0 : n-1]
return entry
}

// randIntn is the function in math/rand needed by renewalTime. A type is used
// to ease deterministic testing.
type randIntn func(int) int

// renewalTime returns when a token should be renewed given its leaseDuration
// and a randomizer to provide jitter.
//
// Leases < 1m will be not jitter.
func renewalTime(dice randIntn, leaseDuration int) time.Duration {
// Start trying to renew at half the lease duration to allow ample time
// for latency and retries.
renew := leaseDuration / 2

// Don't bother about introducing randomness if the
// leaseDuration is too small.
const cutoff = 30
if renew < cutoff {
return time.Duration(renew) * time.Second
}

// jitter is the amount +/- to vary the renewal time
const jitter = 10
min := renew - jitter
renew = min + dice(jitter*2)

return time.Duration(renew) * time.Second
}
56 changes: 52 additions & 4 deletions client/vaultclient/vaultclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/hashicorp/nomad/testutil"
vaultapi "github.com/hashicorp/vault/api"
vaultconsts "github.com/hashicorp/vault/helper/consts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -76,8 +77,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
}(errCh)
}

if c.heap.Length() != num {
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, c.heap.Length())
c.lock.Lock()
length := c.heap.Length()
c.lock.Unlock()
if length != num {
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, length)
}

time.Sleep(time.Duration(testutil.TestMultiplier()) * time.Second)
Expand All @@ -88,8 +92,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
}
}

if c.heap.Length() != 0 {
t.Fatalf("bad: heap length: expected: 0, actual: %d", c.heap.Length())
c.lock.Lock()
length = c.heap.Length()
c.lock.Unlock()
if length != 0 {
t.Fatalf("bad: heap length: expected: 0, actual: %d", length)
}
}

Expand Down Expand Up @@ -300,3 +307,44 @@ func TestVaultClient_RenewNonexistentLease(t *testing.T) {
t.Fatalf("expected \"%s\" or \"%s\" in error message, got \"%v\"", "lease not found", "lease is not renewable", err.Error())
}
}

// TestVaultClient_RenewalTime_Long asserts that for leases over 1m the renewal
// time is jittered.
func TestVaultClient_RenewalTime_Long(t *testing.T) {
t.Parallel()

// highRoller is a randIntn func that always returns the max value
highRoller := func(n int) int {
return n - 1
}

// lowRoller is a randIntn func that always returns the min value (0)
lowRoller := func(int) int {
return 0
}

assert.Equal(t, 39*time.Second, renewalTime(highRoller, 60))
assert.Equal(t, 20*time.Second, renewalTime(lowRoller, 60))

assert.Equal(t, 309*time.Second, renewalTime(highRoller, 600))
assert.Equal(t, 290*time.Second, renewalTime(lowRoller, 600))

const days3 = 60 * 60 * 24 * 3
assert.Equal(t, (days3/2+9)*time.Second, renewalTime(highRoller, days3))
assert.Equal(t, (days3/2-10)*time.Second, renewalTime(lowRoller, days3))
}

// TestVaultClient_RenewalTime_Short asserts that for leases under 1m the renewal
// time is lease/2.
func TestVaultClient_RenewalTime_Short(t *testing.T) {
t.Parallel()

dice := func(int) int {
require.Fail(t, "dice should not have been called")
panic("unreachable")
}

assert.Equal(t, 29*time.Second, renewalTime(dice, 58))
assert.Equal(t, 15*time.Second, renewalTime(dice, 30))
assert.Equal(t, 1*time.Second, renewalTime(dice, 2))
}

0 comments on commit 009b750

Please sign in to comment.