-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update lease renewer logic #4090
Changes from 5 commits
8eeb075
8f97444
9a14905
1631036
c4713e0
2a5da57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,9 +13,6 @@ var ( | |
ErrRenewerNotRenewable = errors.New("secret is not renewable") | ||
ErrRenewerNoSecretData = errors.New("returned empty secret data") | ||
|
||
// DefaultRenewerGrace is the default grace period | ||
DefaultRenewerGrace = 15 * time.Second | ||
|
||
// DefaultRenewerRenewBuffer is the default size of the buffer for renew | ||
// messages on the channel. | ||
DefaultRenewerRenewBuffer = 5 | ||
|
@@ -111,9 +108,6 @@ func (c *Client) NewRenewer(i *RenewerInput) (*Renewer, error) { | |
} | ||
|
||
grace := i.Grace | ||
if grace == 0 { | ||
grace = DefaultRenewerGrace | ||
} | ||
|
||
random := i.Rand | ||
if random == nil { | ||
|
@@ -184,6 +178,9 @@ func (r *Renewer) renewAuth() error { | |
return ErrRenewerNotRenewable | ||
} | ||
|
||
priorDuration := time.Duration(r.secret.LeaseDuration) * time.Second | ||
r.calculateGrace(priorDuration) | ||
|
||
client, token := r.client, r.secret.Auth.ClientToken | ||
|
||
for { | ||
|
@@ -216,13 +213,28 @@ func (r *Renewer) renewAuth() error { | |
return ErrRenewerNotRenewable | ||
} | ||
|
||
// Grab the lease duration and sleep duration - note that we grab the auth | ||
// lease duration, not the secret lease duration. | ||
// Grab the lease duration | ||
leaseDuration := time.Duration(renewal.Auth.LeaseDuration) * time.Second | ||
sleepDuration := r.sleepDuration(leaseDuration) | ||
|
||
// If we are within grace, return now. | ||
if leaseDuration <= r.grace || sleepDuration <= r.grace { | ||
// We keep evaluating a new grace period so long as the lease is | ||
// extending. Once it stops extending, we've hit the max and need to | ||
// rely on the grace duration. | ||
if leaseDuration > priorDuration { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we calculate the new grace if the lease duration doesn't change, which could be a likely common thing? In other words, should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the lease duration doesn't change, the grace period would be within the same parameters. Recalculating it would just shift the amount of random jitter, which if it's truly random won't either help or hurt, so can be skipped. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense 👍 |
||
r.calculateGrace(leaseDuration) | ||
} | ||
priorDuration = leaseDuration | ||
|
||
// The sleep duration is set to 2/3 of the current lease duration plus | ||
// 1/3 of the current grace period, which adds jitter. | ||
sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds()*1/3)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: The argument to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually this was subtly wrong, good catch. |
||
|
||
// If we are within grace, return now; or, if the amount of time we | ||
// would sleep would land us in the grace period. This helps with short | ||
// tokens; for example, you don't want a current lease duration of 4 | ||
// seconds, a grace period of 3 seconds, and end up sleeping for more | ||
// than three of those seconds and having a very small budget of time | ||
// to renew. | ||
if leaseDuration <= r.grace || leaseDuration-sleepDuration <= r.grace { | ||
return nil | ||
} | ||
|
||
|
@@ -241,6 +253,9 @@ func (r *Renewer) renewLease() error { | |
return ErrRenewerNotRenewable | ||
} | ||
|
||
priorDuration := time.Duration(r.secret.LeaseDuration) * time.Second | ||
r.calculateGrace(priorDuration) | ||
|
||
client, leaseID := r.client, r.secret.LeaseID | ||
|
||
for { | ||
|
@@ -273,12 +288,28 @@ func (r *Renewer) renewLease() error { | |
return ErrRenewerNotRenewable | ||
} | ||
|
||
// Grab the lease duration and sleep duration | ||
// Grab the lease duration | ||
leaseDuration := time.Duration(renewal.LeaseDuration) * time.Second | ||
sleepDuration := r.sleepDuration(leaseDuration) | ||
|
||
// If we are within grace, return now. | ||
if leaseDuration <= r.grace || sleepDuration <= r.grace { | ||
// We keep evaluating a new grace period so long as the lease is | ||
// extending. Once it stops extending, we've hit the max and need to | ||
// rely on the grace duration. | ||
if leaseDuration > priorDuration { | ||
r.calculateGrace(leaseDuration) | ||
} | ||
priorDuration = leaseDuration | ||
|
||
// The sleep duration is set to 2/3 of the current lease duration plus | ||
// 1/3 of the current grace period, which adds jitter. | ||
sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds()*1/3)) | ||
|
||
// If we are within grace, return now; or, if the amount of time we | ||
// would sleep would land us in the grace period. This helps with short | ||
// tokens; for example, you don't want a current lease duration of 4 | ||
// seconds, a grace period of 3 seconds, and end up sleeping for more | ||
// than three of those seconds and having a very small budget of time | ||
// to renew. | ||
if leaseDuration <= r.grace || leaseDuration-sleepDuration <= r.grace { | ||
return nil | ||
} | ||
|
||
|
@@ -307,3 +338,20 @@ func (r *Renewer) sleepDuration(base time.Duration) time.Duration { | |
|
||
return time.Duration(sleep) | ||
} | ||
|
||
// calculateGrace calculates the grace period based on a reasonable set of | ||
// assumptions given the total lease time; it also adds some jitter to not have | ||
// clients be in sync. | ||
func (r *Renewer) calculateGrace(leaseDuration time.Duration) { | ||
if leaseDuration == 0 { | ||
r.grace = 0 | ||
return | ||
} | ||
|
||
leaseNanos := float64(leaseDuration.Nanoseconds()) | ||
jitterMax := 0.1 * leaseNanos | ||
|
||
// For a given lease duration, we want to allow 80-90% of that to elapse, | ||
// so the remaining amount is the grace period | ||
r.grace = time.Duration(leaseNanos*0.1) + time.Duration(uint64(r.random.Int63())%uint64(jitterMax)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use the initialized There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,8 +109,8 @@ func TestRenewer_Renew(t *testing.T) { | |
"creation_statements": `` + | ||
`CREATE ROLE "{{name}}" WITH LOGIN PASSWORD '{{password}}' VALID UNTIL '{{expiration}}';` + | ||
`GRANT SELECT ON ALL TABLES IN SCHEMA public TO "{{name}}";`, | ||
"default_ttl": "1s", | ||
"max_ttl": "3s", | ||
"default_ttl": "5s", | ||
"max_ttl": "10s", | ||
}); err != nil { | ||
t.Fatal(err) | ||
} | ||
|
@@ -139,22 +139,28 @@ func TestRenewer_Renew(t *testing.T) { | |
if !renew.Secret.Renewable { | ||
t.Errorf("expected lease to be renewable: %#v", renew) | ||
} | ||
if renew.Secret.LeaseDuration > 2 { | ||
t.Errorf("expected lease to < 2s: %#v", renew) | ||
if renew.Secret.LeaseDuration > 5 { | ||
t.Errorf("expected lease to <= 5s: %#v", renew) | ||
} | ||
case <-time.After(3 * time.Second): | ||
case <-time.After(5 * time.Second): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, because for any given renewal it should use the default lease so we should expect activity within the default lease period, not max lease period. |
||
t.Errorf("no renewal") | ||
} | ||
|
||
select { | ||
case err := <-v.DoneCh(): | ||
if err != nil { | ||
t.Fatal(err) | ||
outer: | ||
for { | ||
select { | ||
case err := <-v.DoneCh(): | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
break outer | ||
case renew := <-v.RenewCh(): | ||
t.Logf("renew called, remaining lease duration: %d", renew.Secret.LeaseDuration) | ||
continue outer | ||
case <-time.After(5 * time.Second): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. |
||
t.Errorf("no data") | ||
break outer | ||
} | ||
case renew := <-v.RenewCh(): | ||
t.Fatalf("should not have renewed (lease should be up): %#v", renew) | ||
case <-time.After(3 * time.Second): | ||
t.Errorf("no data") | ||
} | ||
}) | ||
|
||
|
@@ -205,15 +211,20 @@ func TestRenewer_Renew(t *testing.T) { | |
t.Errorf("no renewal") | ||
} | ||
|
||
select { | ||
case err := <-v.DoneCh(): | ||
if err != nil { | ||
t.Fatal(err) | ||
outer: | ||
for { | ||
select { | ||
case err := <-v.DoneCh(): | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
break outer | ||
case <-v.RenewCh(): | ||
continue outer | ||
case <-time.After(3 * time.Second): | ||
t.Errorf("no data") | ||
break outer | ||
} | ||
case renew := <-v.RenewCh(): | ||
t.Fatalf("should not have renewed (lease should be up): %#v", renew) | ||
case <-time.After(3 * time.Second): | ||
t.Errorf("no data") | ||
} | ||
}) | ||
}) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be
r.secret.Auth.LeaseDuration
instead ofr.secret.LeaseDuration
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes! Good catch.