From 8eeb0755be6b2b83a20a9af7cf2184c417de47f8 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 1 Mar 2018 20:41:22 -0500 Subject: [PATCH 1/5] Add grace period calculation logic to renewer --- api/renewer.go | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/api/renewer.go b/api/renewer.go index 2a72ebe2ef66..ce41e3cd33ea 100644 --- a/api/renewer.go +++ b/api/renewer.go @@ -5,6 +5,8 @@ import ( "math/rand" "sync" "time" + + uuid "github.com/hashicorp/go-uuid" ) var ( @@ -13,9 +15,6 @@ var ( ErrRenewerNotRenewable = errors.New("secret is not renewable") ErrRenewerNoSecretData = errors.New("returned empty secret data") - // DefaultRenewerGrace is the default grace period - DefaultRenewerGrace = 15 * time.Second - // DefaultRenewerRenewBuffer is the default size of the buffer for renew // messages on the channel. DefaultRenewerRenewBuffer = 5 @@ -111,9 +110,6 @@ func (c *Client) NewRenewer(i *RenewerInput) (*Renewer, error) { } grace := i.Grace - if grace == 0 { - grace = DefaultRenewerGrace - } random := i.Rand if random == nil { @@ -241,6 +237,9 @@ func (r *Renewer) renewLease() error { return ErrRenewerNotRenewable } + priorDuration := time.Duration(r.secret.LeaseDuration) * time.Second + r.calculateGrace(priorDuration) + client, leaseID := r.client, r.secret.LeaseID for { @@ -277,8 +276,16 @@ func (r *Renewer) renewLease() error { leaseDuration := time.Duration(renewal.LeaseDuration) * time.Second sleepDuration := r.sleepDuration(leaseDuration) + // We keep evaluating a new grace period so long as the lease is + // extending. Once it stops extending, we've hit the max and need to + // rely on the grace duration. + if leaseDuration > priorDuration { + r.calculateGrace(leaseDuration) + } + priorDuration = leaseDuration + // If we are within grace, return now. - if leaseDuration <= r.grace || sleepDuration <= r.grace { + if leaseDuration <= r.grace { return nil } @@ -307,3 +314,24 @@ func (r *Renewer) sleepDuration(base time.Duration) time.Duration { return time.Duration(sleep) } + +// calculateGrace calculates the grace period based on a reasonable set of +// assumptions given the total lease time; it also adds some jitter to not have +// clients be in sync. We calculate this continuously so long as the new lease +// duration is greater than the previous. +func (r *Renewer) calculateGrace(leaseDuration time.Duration) { + leaseNanos := float64(leaseDuration.Nanoseconds()) + b, err := uuid.GenerateRandomBytes(1) + if err != nil || len(b) != 1 { + r.grace = time.Duration(0.9 * leaseNanos) + return + } + + var skew float64 = (float64(b[0]) - 128) / 128.0 + switch skew { + case 0.0: + r.grace = time.Duration(0.9 * leaseNanos) + default: + r.grace = time.Duration(0.9*leaseNanos + 0.05*leaseNanos*skew) + } +} From 8f97444156bd2005090287c4fb5ef3ddcc3ec229 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 2 Mar 2018 14:10:02 -0500 Subject: [PATCH 2/5] Update lease renewer logic. It is believed by myself and members of the Nomad team that this logic should be much more robust in terms of causing large numbers of new secret acquisitions caused by a static grace period. See comments in the code for details. Fixes #3414 --- api/renewer.go | 69 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/api/renewer.go b/api/renewer.go index ce41e3cd33ea..970ff25e1d01 100644 --- a/api/renewer.go +++ b/api/renewer.go @@ -5,8 +5,6 @@ import ( "math/rand" "sync" "time" - - uuid "github.com/hashicorp/go-uuid" ) var ( @@ -180,6 +178,9 @@ func (r *Renewer) renewAuth() error { return ErrRenewerNotRenewable } + priorDuration := time.Duration(r.secret.LeaseDuration) * time.Second + r.calculateGrace(priorDuration) + client, token := r.client, r.secret.Auth.ClientToken for { @@ -212,13 +213,28 @@ func (r *Renewer) renewAuth() error { return ErrRenewerNotRenewable } - // Grab the lease duration and sleep duration - note that we grab the auth - // lease duration, not the secret lease duration. + // Grab the lease duration leaseDuration := time.Duration(renewal.Auth.LeaseDuration) * time.Second - sleepDuration := r.sleepDuration(leaseDuration) - // If we are within grace, return now. - if leaseDuration <= r.grace || sleepDuration <= r.grace { + // We keep evaluating a new grace period so long as the lease is + // extending. Once it stops extending, we've hit the max and need to + // rely on the grace duration. + if leaseDuration > priorDuration { + r.calculateGrace(leaseDuration) + } + priorDuration = leaseDuration + + // The sleep duration is set to 2/3 of the current lease duration plus + // 1/3 of the current grace period, which adds jitter. + sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds()*1/3)) + + // If we are within grace, return now; or, if the amount of time we + // would sleep would land us in the grace period. This helps with short + // tokens; for example, you don't want a current lease duration of 4 + // seconds, a grace period of 3 seconds, and end up sleeping for more + // than three of those seconds and having a very small budget of time + // to renew. + if leaseDuration <= r.grace || leaseDuration-sleepDuration <= r.grace { return nil } @@ -272,9 +288,8 @@ func (r *Renewer) renewLease() error { return ErrRenewerNotRenewable } - // Grab the lease duration and sleep duration + // Grab the lease duration leaseDuration := time.Duration(renewal.LeaseDuration) * time.Second - sleepDuration := r.sleepDuration(leaseDuration) // We keep evaluating a new grace period so long as the lease is // extending. Once it stops extending, we've hit the max and need to @@ -284,8 +299,17 @@ func (r *Renewer) renewLease() error { } priorDuration = leaseDuration - // If we are within grace, return now. - if leaseDuration <= r.grace { + // The sleep duration is set to 2/3 of the current lease duration plus + // 1/3 of the current grace period, which adds jitter. + sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds()*1/3)) + + // If we are within grace, return now; or, if the amount of time we + // would sleep would land us in the grace period. This helps with short + // tokens; for example, you don't want a current lease duration of 4 + // seconds, a grace period of 3 seconds, and end up sleeping for more + // than three of those seconds and having a very small budget of time + // to renew. + if leaseDuration <= r.grace || leaseDuration-sleepDuration <= r.grace { return nil } @@ -318,20 +342,19 @@ func (r *Renewer) sleepDuration(base time.Duration) time.Duration { // calculateGrace calculates the grace period based on a reasonable set of // assumptions given the total lease time; it also adds some jitter to not have // clients be in sync. We calculate this continuously so long as the new lease -// duration is greater than the previous. +// duration is greater than the previous; no change means we don't need to +// recalculate, and if the lease duration keeps decreasing we've hit max and +// want to be able to rely on this. func (r *Renewer) calculateGrace(leaseDuration time.Duration) { - leaseNanos := float64(leaseDuration.Nanoseconds()) - b, err := uuid.GenerateRandomBytes(1) - if err != nil || len(b) != 1 { - r.grace = time.Duration(0.9 * leaseNanos) + if leaseDuration == 0 { + r.grace = 0 return } - var skew float64 = (float64(b[0]) - 128) / 128.0 - switch skew { - case 0.0: - r.grace = time.Duration(0.9 * leaseNanos) - default: - r.grace = time.Duration(0.9*leaseNanos + 0.05*leaseNanos*skew) - } + leaseNanos := float64(leaseDuration.Nanoseconds()) + jitterMax := 0.1 * leaseNanos + + // For a given lease duration, we want to allow 80-90% of that to elapse, + // so the remaining amount is the grace period + r.grace = time.Duration(leaseNanos*0.1) + time.Duration(uint64(r.random.Int63())%uint64(jitterMax)) } From 16310366f03bca50cd02baa3cfd152a09d2bce74 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 15 Mar 2018 19:20:25 -0400 Subject: [PATCH 3/5] Fix some commenting and fix tests --- api/renewer.go | 5 +--- api/renewer_integration_test.go | 42 ++++++++++++++++++++------------- api/renewer_test.go | 1 - 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/api/renewer.go b/api/renewer.go index 2749f7213adb..86107555db17 100644 --- a/api/renewer.go +++ b/api/renewer.go @@ -341,10 +341,7 @@ func (r *Renewer) sleepDuration(base time.Duration) time.Duration { // calculateGrace calculates the grace period based on a reasonable set of // assumptions given the total lease time; it also adds some jitter to not have -// clients be in sync. We calculate this continuously so long as the new lease -// duration is greater than the previous; no change means we don't need to -// recalculate, and if the lease duration keeps decreasing we've hit max and -// want to be able to rely on this. +// clients be in sync. func (r *Renewer) calculateGrace(leaseDuration time.Duration) { if leaseDuration == 0 { r.grace = 0 diff --git a/api/renewer_integration_test.go b/api/renewer_integration_test.go index 50a775e1261c..3cc1ec05b387 100644 --- a/api/renewer_integration_test.go +++ b/api/renewer_integration_test.go @@ -146,15 +146,20 @@ func TestRenewer_Renew(t *testing.T) { t.Errorf("no renewal") } - select { - case err := <-v.DoneCh(): - if err != nil { - t.Fatal(err) + outer: + for { + select { + case err := <-v.DoneCh(): + if err != nil { + t.Fatal(err) + } + break outer + case <-v.RenewCh(): + continue outer + case <-time.After(3 * time.Second): + t.Errorf("no data") + break outer } - case renew := <-v.RenewCh(): - t.Fatalf("should not have renewed (lease should be up): %#v", renew) - case <-time.After(3 * time.Second): - t.Errorf("no data") } }) @@ -205,15 +210,20 @@ func TestRenewer_Renew(t *testing.T) { t.Errorf("no renewal") } - select { - case err := <-v.DoneCh(): - if err != nil { - t.Fatal(err) + outer: + for { + select { + case err := <-v.DoneCh(): + if err != nil { + t.Fatal(err) + } + break outer + case <-v.RenewCh(): + continue outer + case <-time.After(3 * time.Second): + t.Errorf("no data") + break outer } - case renew := <-v.RenewCh(): - t.Fatalf("should not have renewed (lease should be up): %#v", renew) - case <-time.After(3 * time.Second): - t.Errorf("no data") } }) }) diff --git a/api/renewer_test.go b/api/renewer_test.go index 262484e0fa01..d0b828b451ed 100644 --- a/api/renewer_test.go +++ b/api/renewer_test.go @@ -41,7 +41,6 @@ func TestRenewer_NewRenewer(t *testing.T) { }, &Renewer{ secret: &Secret{}, - grace: DefaultRenewerGrace, }, false, }, From c4713e0f647a97cc843a045efb9c92eea31eb721 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 16 Mar 2018 10:05:25 -0400 Subject: [PATCH 4/5] Add more time to test so that integ tests don't time out --- api/renewer_integration_test.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/api/renewer_integration_test.go b/api/renewer_integration_test.go index 3cc1ec05b387..e3b83e93fbee 100644 --- a/api/renewer_integration_test.go +++ b/api/renewer_integration_test.go @@ -109,8 +109,8 @@ func TestRenewer_Renew(t *testing.T) { "creation_statements": `` + `CREATE ROLE "{{name}}" WITH LOGIN PASSWORD '{{password}}' VALID UNTIL '{{expiration}}';` + `GRANT SELECT ON ALL TABLES IN SCHEMA public TO "{{name}}";`, - "default_ttl": "1s", - "max_ttl": "3s", + "default_ttl": "5s", + "max_ttl": "10s", }); err != nil { t.Fatal(err) } @@ -139,10 +139,10 @@ func TestRenewer_Renew(t *testing.T) { if !renew.Secret.Renewable { t.Errorf("expected lease to be renewable: %#v", renew) } - if renew.Secret.LeaseDuration > 2 { - t.Errorf("expected lease to < 2s: %#v", renew) + if renew.Secret.LeaseDuration > 5 { + t.Errorf("expected lease to <= 5s: %#v", renew) } - case <-time.After(3 * time.Second): + case <-time.After(5 * time.Second): t.Errorf("no renewal") } @@ -154,9 +154,10 @@ func TestRenewer_Renew(t *testing.T) { t.Fatal(err) } break outer - case <-v.RenewCh(): + case renew := <-v.RenewCh(): + t.Logf("renew called, remaining lease duration: %d", renew.Secret.LeaseDuration) continue outer - case <-time.After(3 * time.Second): + case <-time.After(5 * time.Second): t.Errorf("no data") break outer } From 2a5da570ca31a6e0ed326fc246d6354a08bd4ec8 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Mon, 19 Mar 2018 11:39:21 -0400 Subject: [PATCH 5/5] Fix some review feedback --- api/renewer.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/renewer.go b/api/renewer.go index 86107555db17..7fd1de7db20d 100644 --- a/api/renewer.go +++ b/api/renewer.go @@ -178,7 +178,7 @@ func (r *Renewer) renewAuth() error { return ErrRenewerNotRenewable } - priorDuration := time.Duration(r.secret.LeaseDuration) * time.Second + priorDuration := time.Duration(r.secret.Auth.LeaseDuration) * time.Second r.calculateGrace(priorDuration) client, token := r.client, r.secret.Auth.ClientToken @@ -226,7 +226,7 @@ func (r *Renewer) renewAuth() error { // The sleep duration is set to 2/3 of the current lease duration plus // 1/3 of the current grace period, which adds jitter. - sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds()*1/3)) + sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds())/3) // If we are within grace, return now; or, if the amount of time we // would sleep would land us in the grace period. This helps with short @@ -301,7 +301,7 @@ func (r *Renewer) renewLease() error { // The sleep duration is set to 2/3 of the current lease duration plus // 1/3 of the current grace period, which adds jitter. - sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds()*1/3)) + sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds())/3) // If we are within grace, return now; or, if the amount of time we // would sleep would land us in the grace period. This helps with short @@ -353,5 +353,5 @@ func (r *Renewer) calculateGrace(leaseDuration time.Duration) { // For a given lease duration, we want to allow 80-90% of that to elapse, // so the remaining amount is the grace period - r.grace = time.Duration(leaseNanos*0.1) + time.Duration(uint64(r.random.Int63())%uint64(jitterMax)) + r.grace = time.Duration(jitterMax) + time.Duration(uint64(r.random.Int63())%uint64(jitterMax)) }