-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cleaning of stale leases #2452
Cleaning of stale leases #2452
Changes from 41 commits
1e5d6e3
239bd1c
14aaa0a
3477038
e52625d
dca0d70
de1a2a0
65c63b4
b036478
711153d
0d629ff
0c65cd4
785177a
3fdf38a
98cdb68
2ef62fe
a8ef2c0
0892102
8c7b175
853233a
d07d3cb
79fc0d8
497bebe
aa08e5c
a3c2a42
b6843ec
0c02540
b3c6a56
415b0a2
0bda5a7
8d35f92
a2e431b
2d21bf6
5bc47b0
b0c4a7e
265b4cd
2f6e924
f1d2fc3
5320da0
cacf072
4de09fb
106f08a
7829107
1378dd5
5dde45d
e61298e
0e10477
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,14 +6,18 @@ import ( | |
"path" | ||
"strings" | ||
"sync" | ||
"sync/atomic" | ||
"time" | ||
|
||
"github.com/armon/go-metrics" | ||
log "github.com/mgutz/logxi/v1" | ||
|
||
"github.com/hashicorp/errwrap" | ||
multierror "github.com/hashicorp/go-multierror" | ||
"github.com/hashicorp/go-uuid" | ||
"github.com/hashicorp/vault/helper/consts" | ||
"github.com/hashicorp/vault/helper/jsonutil" | ||
"github.com/hashicorp/vault/helper/locksutil" | ||
"github.com/hashicorp/vault/logical" | ||
) | ||
|
||
|
@@ -57,6 +61,8 @@ type ExpirationManager struct { | |
|
||
pending map[string]*time.Timer | ||
pendingLock sync.Mutex | ||
|
||
tidyLock int64 | ||
} | ||
|
||
// NewExpirationManager creates a new ExpirationManager that is backed | ||
|
@@ -114,6 +120,112 @@ func (c *Core) stopExpiration() error { | |
return nil | ||
} | ||
|
||
// Tidy cleans up the dangling storage entries for leases. It scans the storage | ||
// view to find all the available leases, checks if the token embedded in it is | ||
// either empty or invalid and in both the cases, it revokes them. It also uses | ||
// a token cache to avoid multiple lookups of the same token ID. It is normally | ||
// not required to use the API that invokes this. This is only intended to | ||
// clean up the corrupt storage due to bugs. | ||
func (m *ExpirationManager) Tidy() error { | ||
var tidyErrors *multierror.Error | ||
|
||
if !atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { | ||
m.logger.Debug("expiration: tidy operation on leases is already in progress") | ||
return fmt.Errorf("tidy operation on leases is already in progress") | ||
} | ||
|
||
defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) | ||
|
||
m.logger.Debug("expiration: beginning tidy operation on leases") | ||
|
||
// Create a cache to keep track of looked up tokens | ||
tokenCache := make(map[string]bool) | ||
countLease := 0 | ||
deletedCountEmptyToken := 0 | ||
deletedCountInvalidToken := 0 | ||
|
||
tidyFunc := func(leaseID string) { | ||
countLease++ | ||
if countLease%500 == 0 { | ||
m.logger.Debug("expiration: tidying leases", "progress", countLease) | ||
} | ||
|
||
le, err := m.loadEntry(leaseID) | ||
if err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's worth adding a memoizing cache (a map would work, as a set) so that if we already know a token does still exist we don't have to look it up again. |
||
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to load the lease ID %q: %v", leaseID, err)) | ||
return | ||
} | ||
|
||
if le == nil { | ||
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("nil entry for lease ID %q: %v", leaseID, err)) | ||
return | ||
} | ||
|
||
var isValid, ok bool | ||
revokeLease := false | ||
if le.ClientToken == "" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case we should probably revoke the lease as well. |
||
m.logger.Debug("expiration: revoking lease which has an empty token", "lease_id", leaseID) | ||
revokeLease = true | ||
deletedCountEmptyToken++ | ||
goto REVOKE_CHECK | ||
} | ||
|
||
isValid, ok = tokenCache[le.ClientToken] | ||
if !ok { | ||
saltedID := m.tokenStore.SaltID(le.ClientToken) | ||
lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) | ||
lock.RLock() | ||
te, err := m.tokenStore.lookupSalted(saltedID, true) | ||
lock.RUnlock() | ||
|
||
if err != nil { | ||
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err)) | ||
return | ||
} | ||
|
||
if te == nil { | ||
m.logger.Debug("expiration: revoking lease which holds an invalid token", "lease_id", leaseID) | ||
revokeLease = true | ||
deletedCountInvalidToken++ | ||
tokenCache[le.ClientToken] = false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessary, but I think you might as well add a |
||
} else { | ||
tokenCache[le.ClientToken] = true | ||
} | ||
} else { | ||
if isValid { | ||
return | ||
} else { | ||
m.logger.Debug("expiration: revoking lease which contains an invalid token", "lease_id", leaseID) | ||
deletedCountInvalidToken++ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment here. |
||
revokeLease = true | ||
} | ||
} | ||
|
||
REVOKE_CHECK: | ||
if revokeLease { | ||
// Force the revocation and skip going through the token store | ||
// again | ||
err = m.revokeCommon(leaseID, true, true) | ||
if err != nil { | ||
tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke an invalid lease with ID %q: %v", leaseID, err)) | ||
return | ||
} | ||
} | ||
} | ||
|
||
if err := logical.ScanView(m.idView, tidyFunc); err != nil { | ||
return err | ||
} | ||
|
||
m.logger.Debug("expiration: ending tidy operation on leases") | ||
|
||
m.logger.Debug("expiration: number of leases scanned", "total", countLease) | ||
m.logger.Debug("expiration: number of revoked leases which had empty tokens", "deleted_count", deletedCountEmptyToken) | ||
m.logger.Debug("expiration: number of revoked leases which had invalid tokens", "deleted_count", deletedCountInvalidToken) | ||
|
||
return tidyErrors.ErrorOrNil() | ||
} | ||
|
||
// Restore is used to recover the lease states when starting. | ||
// This is used after starting the vault. | ||
func (m *ExpirationManager) Restore() error { | ||
|
@@ -498,7 +610,7 @@ func (m *ExpirationManager) RenewToken(req *logical.Request, source string, toke | |
// Register is used to take a request and response with an associated | ||
// lease. The secret gets assigned a LeaseID and the management of | ||
// of lease is assumed by the expiration manager. | ||
func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (string, error) { | ||
func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (id string, retErr error) { | ||
defer metrics.MeasureSince([]string{"expire", "register"}, time.Now()) | ||
// Ignore if there is no leased secret | ||
if resp == nil || resp.Secret == nil { | ||
|
@@ -515,8 +627,38 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons | |
if err != nil { | ||
return "", err | ||
} | ||
|
||
leaseID := path.Join(req.Path, leaseUUID) | ||
|
||
defer func() { | ||
// If there is an error we want to rollback as much as possible (note | ||
// that errors here are ignored to do as much cleanup as we can). We | ||
// want to revoke a generated secret (since an error means we may not | ||
// be successfully tracking it), remove indexes, and delete the entry. | ||
if retErr != nil { | ||
revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) | ||
if err != nil { | ||
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we distinguish the 4 error messages here so that if we hit them, we can know where exactly the error was caused. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} else if revResp != nil && revResp.IsError() { | ||
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", revResp.Error())) | ||
} | ||
|
||
if err := m.deleteEntry(leaseID); err != nil { | ||
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) | ||
} | ||
|
||
if err := m.removeIndexByToken(req.ClientToken, leaseID); err != nil { | ||
retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) | ||
} | ||
} | ||
}() | ||
|
||
if req.ClientToken == "" { | ||
return "", fmt.Errorf("expiration: cannot register a lease with an empty client token") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we move the defer function to be after this check? There is no point in revoking a lease which didn't even get through the input validations. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
|
||
le := leaseEntry{ | ||
LeaseID: path.Join(req.Path, leaseUUID), | ||
LeaseID: leaseID, | ||
ClientToken: req.ClientToken, | ||
Path: req.Path, | ||
Data: resp.Data, | ||
|
@@ -548,6 +690,10 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons | |
func (m *ExpirationManager) RegisterAuth(source string, auth *logical.Auth) error { | ||
defer metrics.MeasureSince([]string{"expire", "register-auth"}, time.Now()) | ||
|
||
if auth.ClientToken == "" { | ||
return fmt.Errorf("expiration: cannot register an auth lease with an empty token") | ||
} | ||
|
||
// Create a lease entry | ||
le := leaseEntry{ | ||
LeaseID: path.Join(source, m.tokenStore.SaltID(auth.ClientToken)), | ||
|
@@ -668,7 +814,7 @@ func (m *ExpirationManager) revokeEntry(le *leaseEntry) error { | |
// Revocation of login tokens is special since we can by-pass the | ||
// backend and directly interact with the token store | ||
if le.Auth != nil { | ||
if err := m.tokenStore.RevokeTree(le.Auth.ClientToken); err != nil { | ||
if err := m.tokenStore.RevokeTree(le.ClientToken); err != nil { | ||
return fmt.Errorf("failed to revoke token: %v", err) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add some progress logging so people can be aware things are still happening.