From 1e5d6e30313a405dddf022798bbc3c82a13a68ce Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 7 Mar 2017 15:22:21 -0500 Subject: [PATCH 01/41] Added sys/tidy-leases endpoint --- vault/expiration.go | 54 +++++++++++++++++++++++++++++++++++++++++ vault/logical_system.go | 29 ++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/vault/expiration.go b/vault/expiration.go index 87fceaba6755..249b0b3e86c1 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -11,9 +11,11 @@ import ( "github.com/armon/go-metrics" log "github.com/mgutz/logxi/v1" + multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/helper/consts" "github.com/hashicorp/vault/helper/jsonutil" + "github.com/hashicorp/vault/helper/locksutil" "github.com/hashicorp/vault/logical" ) @@ -114,6 +116,58 @@ func (c *Core) stopExpiration() error { return nil } +// Tidy cleans up stale leases which are associated with invalid tokens +func (m *ExpirationManager) Tidy() error { + var tidyErrors *multierror.Error + + tidyFunc := func(leaseID string) { + le, err := m.loadEntry(leaseID) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to load the lease ID %q: %v", leaseID, err)) + return + } + + if le == nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("nil entry for lease ID %q: %v", leaseID, err)) + return + } + + if le.ClientToken == "" { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("missing client token for lease ID %q: %v", leaseID, err)) + return + } + + saltedId := m.tokenStore.SaltID(le.ClientToken) + + lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) + lock.RLock() + defer lock.RUnlock() + + te, err := m.tokenStore.lookupSalted(saltedId, true) + if te == nil { + // Force the revocation and skip going through the token store + // again + err = m.revokeCommon(leaseID, true, true) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke an invalid lease with ID %q: %v", leaseID, err)) + return + } + } + } + + if err := logical.ScanView(m.idView, tidyFunc); err != nil { + return err + } + + // If no errors were encountered, return a normal error instead of a + // multierror + if tidyErrors == nil { + return nil + } + + return tidyErrors +} + // Restore is used to recover the lease states when starting. // This is used after starting the vault. func (m *ExpirationManager) Restore() error { diff --git a/vault/logical_system.go b/vault/logical_system.go index 1645d1887e1c..dbd8d7dcbe80 100644 --- a/vault/logical_system.go +++ b/vault/logical_system.go @@ -72,6 +72,17 @@ func NewSystemBackend(core *Core, config *logical.BackendConfig) (logical.Backen }, Paths: []*framework.Path{ + &framework.Path{ + Pattern: "tidy-leases$", + + Callbacks: map[logical.Operation]framework.OperationFunc{ + logical.UpdateOperation: b.handleTidyLeases, + }, + + HelpSynopsis: strings.TrimSpace(sysHelp["tidy_leases"][0]), + HelpDescription: strings.TrimSpace(sysHelp["tidy_leases"][1]), + }, + &framework.Path{ Pattern: "capabilities-accessor$", @@ -710,6 +721,15 @@ type SystemBackend struct { Backend *framework.Backend } +func (b *SystemBackend) handleTidyLeases(req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + err := b.Core.expiration.Tidy() + if err != nil { + b.Backend.Logger().Error("sys: failed to tidy leases", "error", err) + return handleError(err) + } + return nil, err +} + func (b *SystemBackend) invalidate(key string) { if b.Core.logger.IsTrace() { b.Core.logger.Trace("sys: invaliding key", "key", key) @@ -2354,6 +2374,15 @@ Enable a new audit backend or disable an existing backend. on a given path.`, }, + "tidy_leases": { + `This endpoint performs cleanup tasks that can be run if certain error +conditions have occurred.`, + `This endpoint performs cleanup tasks that can be run to clean up the +lease entries after certain error conditions. Usually running this is not +necessary, and is only required if upgrade notes or support personnel suggest +it.`, + }, + "wrap": { "Response-wraps an arbitrary JSON object.", `Round trips the given input data into a response-wrapped token.`, From 239bd1c69b57d2166e6a7e146605ab3b9a010ba0 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 7 Mar 2017 16:06:05 -0500 Subject: [PATCH 02/41] Add locking where possible while doing auth/token/tidy --- vault/logical_system.go | 2 +- vault/token_store.go | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/vault/logical_system.go b/vault/logical_system.go index dbd8d7dcbe80..3c8596d6f2d3 100644 --- a/vault/logical_system.go +++ b/vault/logical_system.go @@ -2377,7 +2377,7 @@ Enable a new audit backend or disable an existing backend. "tidy_leases": { `This endpoint performs cleanup tasks that can be run if certain error conditions have occurred.`, - `This endpoint performs cleanup tasks that can be run to clean up the + `This endpoint performs cleanup tasks that can be run to clean up the lease entries after certain error conditions. Usually running this is not necessary, and is only required if upgrade notes or support personnel suggest it.`, diff --git a/vault/token_store.go b/vault/token_store.go index 37154dace07c..45754bfe6a11 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -10,9 +10,9 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-uuid" - "github.com/hashicorp/vault/helper/parseutil" "github.com/hashicorp/vault/helper/jsonutil" "github.com/hashicorp/vault/helper/locksutil" + "github.com/hashicorp/vault/helper/parseutil" "github.com/hashicorp/vault/helper/policyutil" "github.com/hashicorp/vault/helper/salt" "github.com/hashicorp/vault/helper/strutil" @@ -1128,7 +1128,8 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData for _, child := range children { // Look up tainted entries so we can be sure that if this isn't - // found, it doesn't exist + // found, it doesn't exist. Doing the following without locking + // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { err = ts.view.Delete(parentPrefix + parent + "/" + child) @@ -1162,11 +1163,14 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } } - saltedId := ts.SaltID(accessorEntry.TokenID) + lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID) + lock.RLock() // Look up tainted variants so we only find entries that truly don't // exist + saltedId := ts.SaltID(accessorEntry.TokenID) te, err := ts.lookupSalted(saltedId, true) + lock.RUnlock() // If token entry is not found assume that the token is not valid any // more and conclude that accessor, leases, and secondary index entries From 3477038c9fb04ae347d5f0c11017a91bd4957045 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 26 Apr 2017 15:07:58 -0400 Subject: [PATCH 03/41] Added atomic lock to ensure a single tidy operation is in progress --- vault/expiration.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 249b0b3e86c1..490e680c3c65 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -6,6 +6,7 @@ import ( "path" "strings" "sync" + "sync/atomic" "time" "github.com/armon/go-metrics" @@ -59,6 +60,8 @@ type ExpirationManager struct { pending map[string]*time.Timer pendingLock sync.Mutex + + tidyLock int64 } // NewExpirationManager creates a new ExpirationManager that is backed @@ -118,6 +121,7 @@ func (c *Core) stopExpiration() error { // Tidy cleans up stale leases which are associated with invalid tokens func (m *ExpirationManager) Tidy() error { + var tidyErrors *multierror.Error tidyFunc := func(leaseID string) { @@ -155,8 +159,13 @@ func (m *ExpirationManager) Tidy() error { } } - if err := logical.ScanView(m.idView, tidyFunc); err != nil { - return err + if atomic.CompareAndSwapInt64(&m.tidylock, 0, 1) { + defer atomic.CompareAndSwapInt64(&tidyLock, 1, 0) + if err := logical.ScanView(m.idView, tidyFunc); err != nil { + return err + } + } else { + return fmt.Errorf("tidy operation is already in progress") } // If no errors were encountered, return a normal error instead of a From e52625d3750a33d2704b363bde339e2715762c1d Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 26 Apr 2017 15:48:28 -0400 Subject: [PATCH 04/41] Revoke lease that has empty token; added logs --- vault/expiration.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 490e680c3c65..32388c474182 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -125,6 +125,8 @@ func (m *ExpirationManager) Tidy() error { var tidyErrors *multierror.Error tidyFunc := func(leaseID string) { + m.logger.Trace("expiration: checking if lease %q is valid", leaseID) + le, err := m.loadEntry(leaseID) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to load the lease ID %q: %v", leaseID, err)) @@ -136,19 +138,26 @@ func (m *ExpirationManager) Tidy() error { return } + revokeLease := false if le.ClientToken == "" { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("missing client token for lease ID %q: %v", leaseID, err)) - return + m.logger.Debug("expiration: lease %q has an empty token", leaseID) + revokeLease = true } saltedId := m.tokenStore.SaltID(le.ClientToken) lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) lock.RLock() - defer lock.RUnlock() - te, err := m.tokenStore.lookupSalted(saltedId, true) + lock.RUnlock() + if te == nil { + m.logger.Debug("expiration: lease %q has an invalid token", leaseID) + revokeLease = true + } + + if revokeLease { + m.logger.Debug("expiration: lease %q is being revoked", leaseID) // Force the revocation and skip going through the token store // again err = m.revokeCommon(leaseID, true, true) From dca0d703aa103017a4aa829edd2343d2c63ed8b9 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 26 Apr 2017 16:11:23 -0400 Subject: [PATCH 05/41] Added logger to token store and logs to tidy function --- vault/expiration.go | 4 ++-- vault/token_store.go | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 32388c474182..5bf4e6063ea2 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -168,8 +168,8 @@ func (m *ExpirationManager) Tidy() error { } } - if atomic.CompareAndSwapInt64(&m.tidylock, 0, 1) { - defer atomic.CompareAndSwapInt64(&tidyLock, 1, 0) + if atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { + defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) if err := logical.ScanView(m.idView, tidyFunc); err != nil { return err } diff --git a/vault/token_store.go b/vault/token_store.go index c3126efc15d3..cc2c6eb36157 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -3,10 +3,13 @@ package vault import ( "encoding/json" "fmt" + "regexp" "strings" "time" + log "github.com/mgutz/logxi/v1" + "github.com/armon/go-metrics" "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-uuid" @@ -90,6 +93,8 @@ type TokenStore struct { tokenLocks []*locksutil.LockEntry cubbyholeDestroyer func(*TokenStore, string) error + + logger log.Logger } // NewTokenStore is used to construct a token store that is @@ -102,14 +107,11 @@ func NewTokenStore(c *Core, config *logical.BackendConfig) (*TokenStore, error) t := &TokenStore{ view: view, cubbyholeDestroyer: destroyCubbyhole, + logger: c.logger, + policyLookupFunc: c.policyStore.GetPolicy, + tokenLocks: locksutil.CreateLocks(), } - if c.policyStore != nil { - t.policyLookupFunc = c.policyStore.GetPolicy - } - - t.tokenLocks = locksutil.CreateLocks() - // Setup the framework endpoints t.Backend = &framework.Backend{ AuthRenew: t.authRenew, @@ -1127,11 +1129,13 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } for _, child := range children { + ts.logger.Trace("token_store: checking if salted token %q is valid", child) // Look up tainted entries so we can be sure that if this isn't // found, it doesn't exist. Doing the following without locking // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { + ts.logger.Debug("token_store: deleting invalid salted token %q", child) err = ts.view.Delete(parentPrefix + parent + "/" + child) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) @@ -1144,6 +1148,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // a valid one. If not, delete the leases associated with that token // and delete the accessor as well. for _, saltedAccessor := range saltedAccessorList { + ts.logger.Trace("token_store: checking if salted accessor is valid") accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) @@ -1154,6 +1159,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // in it. If not, it is an invalid accessor entry and needs to // be deleted. if accessorEntry.TokenID == "" { + ts.logger.Trace("token_store: deleting accessor with invalid token ID") // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation err = ts.view.Delete(accessorPrefix + saltedAccessor) @@ -1176,6 +1182,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // more and conclude that accessor, leases, and secondary index entries // for this token should not exist as well. if te == nil { + ts.logger.Trace("token_store: deleting token which is not associated with a token entry; deleting accessor as well") // RevokeByToken expects a '*TokenEntry'. For the // purposes of tidying, it is sufficient if the token // entry only has ID set. From de1a2a01c3aa0a76410d0985b48355639237cb6c Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 26 Apr 2017 16:54:48 -0400 Subject: [PATCH 06/41] Added caching of looked up tokens --- vault/expiration.go | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 5bf4e6063ea2..900ac52938aa 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -121,9 +121,11 @@ func (c *Core) stopExpiration() error { // Tidy cleans up stale leases which are associated with invalid tokens func (m *ExpirationManager) Tidy() error { - var tidyErrors *multierror.Error + // Create a cache to keep track of looked up tokens + tokenCache := make(map[string]string) + tidyFunc := func(leaseID string) { m.logger.Trace("expiration: checking if lease %q is valid", leaseID) @@ -144,16 +146,32 @@ func (m *ExpirationManager) Tidy() error { revokeLease = true } - saltedId := m.tokenStore.SaltID(le.ClientToken) - - lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) - lock.RLock() - te, err := m.tokenStore.lookupSalted(saltedId, true) - lock.RUnlock() + saltedID := m.tokenStore.SaltID(le.ClientToken) - if te == nil { + switch tokenCache[saltedID] { + case "invalid": m.logger.Debug("expiration: lease %q has an invalid token", leaseID) revokeLease = true + case "valid": + return + default: + lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) + lock.RLock() + te, err := m.tokenStore.lookupSalted(saltedID, true) + lock.RUnlock() + + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err)) + return + } + + if te == nil { + m.logger.Debug("expiration: lease %q has an invalid token", leaseID) + revokeLease = true + tokenCache[saltedID] = "invalid" + } else { + tokenCache[saltedID] = "valid" + } } if revokeLease { From 65c63b4140670af69fbfe2cb169de98c7b30a325 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 26 Apr 2017 17:17:19 -0400 Subject: [PATCH 07/41] Fix the log statements --- vault/expiration.go | 22 +++++++++++----------- vault/token_store.go | 11 ++++++----- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 900ac52938aa..43cfda63d728 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -127,7 +127,7 @@ func (m *ExpirationManager) Tidy() error { tokenCache := make(map[string]string) tidyFunc := func(leaseID string) { - m.logger.Trace("expiration: checking if lease %q is valid", leaseID) + m.logger.Trace("expiration: checking if lease is valid", "lease_id", leaseID) le, err := m.loadEntry(leaseID) if err != nil { @@ -142,19 +142,18 @@ func (m *ExpirationManager) Tidy() error { revokeLease := false if le.ClientToken == "" { - m.logger.Debug("expiration: lease %q has an empty token", leaseID) + m.logger.Debug("expiration: lease has an empty token", "lease_id", leaseID) revokeLease = true } - saltedID := m.tokenStore.SaltID(le.ClientToken) - - switch tokenCache[saltedID] { + switch tokenCache[le.ClientToken] { case "invalid": - m.logger.Debug("expiration: lease %q has an invalid token", leaseID) + m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) revokeLease = true case "valid": return default: + saltedID := m.tokenStore.SaltID(le.ClientToken) lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) lock.RLock() te, err := m.tokenStore.lookupSalted(saltedID, true) @@ -166,16 +165,16 @@ func (m *ExpirationManager) Tidy() error { } if te == nil { - m.logger.Debug("expiration: lease %q has an invalid token", leaseID) + m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) revokeLease = true - tokenCache[saltedID] = "invalid" + tokenCache[le.ClientToken] = "invalid" } else { - tokenCache[saltedID] = "valid" + tokenCache[le.ClientToken] = "valid" } } if revokeLease { - m.logger.Debug("expiration: lease %q is being revoked", leaseID) + m.logger.Debug("expiration: lease is being revoked", "lease_id", leaseID) // Force the revocation and skip going through the token store // again err = m.revokeCommon(leaseID, true, true) @@ -192,7 +191,8 @@ func (m *ExpirationManager) Tidy() error { return err } } else { - return fmt.Errorf("tidy operation is already in progress") + m.logger.Debug("expiration: tidy operation on leases is already in progress") + return fmt.Errorf("tidy operation on leases is already in progress") } // If no errors were encountered, return a normal error instead of a diff --git a/vault/token_store.go b/vault/token_store.go index cc2c6eb36157..95a5be7a856f 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1129,13 +1129,13 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } for _, child := range children { - ts.logger.Trace("token_store: checking if salted token %q is valid", child) + ts.logger.Trace("token_store: checking if token is valid", "salted_token", child) // Look up tainted entries so we can be sure that if this isn't // found, it doesn't exist. Doing the following without locking // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { - ts.logger.Debug("token_store: deleting invalid salted token %q", child) + ts.logger.Debug("token_store: deleting invalid token", "salted_token", child) err = ts.view.Delete(parentPrefix + parent + "/" + child) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) @@ -1148,7 +1148,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // a valid one. If not, delete the leases associated with that token // and delete the accessor as well. for _, saltedAccessor := range saltedAccessorList { - ts.logger.Trace("token_store: checking if salted accessor is valid") + ts.logger.Trace("token_store: checking if salted accessor is valid", "salted_accessor", saltedAccessor) accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) @@ -1159,7 +1159,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // in it. If not, it is an invalid accessor entry and needs to // be deleted. if accessorEntry.TokenID == "" { - ts.logger.Trace("token_store: deleting accessor with invalid token ID") + ts.logger.Trace("token_store: deleting accessor with invalid token ID", "salted_accessor", saltedAccessor) // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation err = ts.view.Delete(accessorPrefix + saltedAccessor) @@ -1182,7 +1182,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // more and conclude that accessor, leases, and secondary index entries // for this token should not exist as well. if te == nil { - ts.logger.Trace("token_store: deleting token which is not associated with a token entry; deleting accessor as well") + ts.logger.Trace("token_store: deleting token with nil entry", "salted_token", saltedId) // RevokeByToken expects a '*TokenEntry'. For the // purposes of tidying, it is sufficient if the token // entry only has ID set. @@ -1198,6 +1198,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData continue } + ts.logger.Trace("token_store: deleting accessor of the token with nil entry", "salted_accessor", saltedAccessor) // If deletion of accessor fails, move on to the next item since // this is just a best-effort operation. We do this last so that on // next run if something above failed we still have the accessor From b036478ac21288bc8bc7357f06c26d5e32b3c2b5 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 26 Apr 2017 17:29:04 -0400 Subject: [PATCH 08/41] Fix logging levels --- vault/expiration.go | 2 +- vault/token_store.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 43cfda63d728..000c5d734b1e 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -174,7 +174,7 @@ func (m *ExpirationManager) Tidy() error { } if revokeLease { - m.logger.Debug("expiration: lease is being revoked", "lease_id", leaseID) + m.logger.Debug("expiration: invalid lease is being revoked", "lease_id", leaseID) // Force the revocation and skip going through the token store // again err = m.revokeCommon(leaseID, true, true) diff --git a/vault/token_store.go b/vault/token_store.go index 95a5be7a856f..e83892d63afe 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1148,7 +1148,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // a valid one. If not, delete the leases associated with that token // and delete the accessor as well. for _, saltedAccessor := range saltedAccessorList { - ts.logger.Trace("token_store: checking if salted accessor is valid", "salted_accessor", saltedAccessor) + ts.logger.Trace("token_store: checking if accessor contains valid token", "salted_accessor", saltedAccessor) accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) @@ -1159,7 +1159,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // in it. If not, it is an invalid accessor entry and needs to // be deleted. if accessorEntry.TokenID == "" { - ts.logger.Trace("token_store: deleting accessor with invalid token ID", "salted_accessor", saltedAccessor) + ts.logger.Debug("token_store: deleting accessor with invalid token ID", "salted_accessor", saltedAccessor) // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation err = ts.view.Delete(accessorPrefix + saltedAccessor) @@ -1182,7 +1182,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // more and conclude that accessor, leases, and secondary index entries // for this token should not exist as well. if te == nil { - ts.logger.Trace("token_store: deleting token with nil entry", "salted_token", saltedId) + ts.logger.Debug("token_store: deleting token with nil entry", "salted_token", saltedId) // RevokeByToken expects a '*TokenEntry'. For the // purposes of tidying, it is sufficient if the token // entry only has ID set. @@ -1198,7 +1198,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData continue } - ts.logger.Trace("token_store: deleting accessor of the token with nil entry", "salted_accessor", saltedAccessor) + ts.logger.Debug("token_store: deleting accessor of the token with nil entry", "salted_accessor", saltedAccessor) // If deletion of accessor fails, move on to the next item since // this is just a best-effort operation. We do this last so that on // next run if something above failed we still have the accessor From 711153d74a1fe409f0f132352f3be6b64c342268 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 27 Apr 2017 10:56:19 -0400 Subject: [PATCH 09/41] Fix logging suggestions; put the policyStore nil check back in --- vault/expiration.go | 7 ++++++- vault/token_store.go | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 000c5d734b1e..7d354e18f68d 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -125,9 +125,13 @@ func (m *ExpirationManager) Tidy() error { // Create a cache to keep track of looked up tokens tokenCache := make(map[string]string) + i := 0 tidyFunc := func(leaseID string) { - m.logger.Trace("expiration: checking if lease is valid", "lease_id", leaseID) + i++ + if i%500 == 0 { + m.logger.Debug("expiration: tidying of leases", "progress", i) + } le, err := m.loadEntry(leaseID) if err != nil { @@ -186,6 +190,7 @@ func (m *ExpirationManager) Tidy() error { } if atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { + m.logger.Trace("expiration: beginning tidy operation on leases") defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) if err := logical.ScanView(m.idView, tidyFunc); err != nil { return err diff --git a/vault/token_store.go b/vault/token_store.go index e83892d63afe..a027edf92109 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -108,10 +108,13 @@ func NewTokenStore(c *Core, config *logical.BackendConfig) (*TokenStore, error) view: view, cubbyholeDestroyer: destroyCubbyhole, logger: c.logger, - policyLookupFunc: c.policyStore.GetPolicy, tokenLocks: locksutil.CreateLocks(), } + if c.policyStore != nil { + t.policyLookupFunc = c.policyStore.GetPolicy + } + // Setup the framework endpoints t.Backend = &framework.Backend{ AuthRenew: t.authRenew, From 0d629ffd051d8026eaeb15428ea159ac6b5a6c8d Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 27 Apr 2017 11:08:11 -0400 Subject: [PATCH 10/41] Cache only valid tokens --- vault/expiration.go | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 7d354e18f68d..c921956fe380 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -124,7 +124,7 @@ func (m *ExpirationManager) Tidy() error { var tidyErrors *multierror.Error // Create a cache to keep track of looked up tokens - tokenCache := make(map[string]string) + validTokenCache := make(map[string]struct{}) i := 0 tidyFunc := func(leaseID string) { @@ -150,31 +150,26 @@ func (m *ExpirationManager) Tidy() error { revokeLease = true } - switch tokenCache[le.ClientToken] { - case "invalid": - m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) - revokeLease = true - case "valid": + if _, ok := validTokenCache[le.ClientToken]; ok { return - default: - saltedID := m.tokenStore.SaltID(le.ClientToken) - lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) - lock.RLock() - te, err := m.tokenStore.lookupSalted(saltedID, true) - lock.RUnlock() + } - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err)) - return - } + saltedID := m.tokenStore.SaltID(le.ClientToken) + lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) + lock.RLock() + te, err := m.tokenStore.lookupSalted(saltedID, true) + lock.RUnlock() - if te == nil { - m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) - revokeLease = true - tokenCache[le.ClientToken] = "invalid" - } else { - tokenCache[le.ClientToken] = "valid" - } + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err)) + return + } + + if te == nil { + m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) + revokeLease = true + } else { + validTokenCache[le.ClientToken] = struct{}{} } if revokeLease { From 0c65cd422df4eef0402c5a3011af2a08bc302c0b Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 27 Apr 2017 11:20:36 -0400 Subject: [PATCH 11/41] Some more logging updates --- vault/expiration.go | 2 +- vault/token_store.go | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index c921956fe380..ae7d58c1ed2b 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -185,7 +185,7 @@ func (m *ExpirationManager) Tidy() error { } if atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { - m.logger.Trace("expiration: beginning tidy operation on leases") + m.logger.Debug("expiration: beginning tidy operation on leases") defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) if err := logical.ScanView(m.idView, tidyFunc); err != nil { return err diff --git a/vault/token_store.go b/vault/token_store.go index a027edf92109..c6f5498853f1 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1122,6 +1122,8 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) } + i := 0 + // Scan through the secondary index entries; if there is an entry // with the token's salt ID at the end, remove it for _, parent := range parentList { @@ -1132,7 +1134,11 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } for _, child := range children { - ts.logger.Trace("token_store: checking if token is valid", "salted_token", child) + i++ + if i%500 == 0 { + ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", i) + } + // Look up tainted entries so we can be sure that if this isn't // found, it doesn't exist. Doing the following without locking // since appropriate locks cannot be held with salted token IDs. @@ -1147,11 +1153,16 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } } + i = 0 // For each of the accessor, see if the token ID associated with it is // a valid one. If not, delete the leases associated with that token // and delete the accessor as well. for _, saltedAccessor := range saltedAccessorList { - ts.logger.Trace("token_store: checking if accessor contains valid token", "salted_accessor", saltedAccessor) + i++ + if i%500 == 0 { + ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", i) + } + accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) @@ -1162,7 +1173,8 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // in it. If not, it is an invalid accessor entry and needs to // be deleted. if accessorEntry.TokenID == "" { - ts.logger.Debug("token_store: deleting accessor with invalid token ID", "salted_accessor", saltedAccessor) + ts.logger.Debug("token_store: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) + // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation err = ts.view.Delete(accessorPrefix + saltedAccessor) @@ -1186,6 +1198,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // for this token should not exist as well. if te == nil { ts.logger.Debug("token_store: deleting token with nil entry", "salted_token", saltedId) + // RevokeByToken expects a '*TokenEntry'. For the // purposes of tidying, it is sufficient if the token // entry only has ID set. @@ -1201,7 +1214,8 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData continue } - ts.logger.Debug("token_store: deleting accessor of the token with nil entry", "salted_accessor", saltedAccessor) + ts.logger.Debug("token_store: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) + // If deletion of accessor fails, move on to the next item since // this is just a best-effort operation. We do this last so that on // next run if something above failed we still have the accessor From 3fdf38a58a990d4a9cd7d148c8bc3178e55fb4d3 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 27 Apr 2017 11:31:42 -0400 Subject: [PATCH 12/41] Distinguish valid and invalid tokens using bool value in cache --- vault/expiration.go | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index ae7d58c1ed2b..96617cb6b5c9 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -124,7 +124,7 @@ func (m *ExpirationManager) Tidy() error { var tidyErrors *multierror.Error // Create a cache to keep track of looked up tokens - validTokenCache := make(map[string]struct{}) + tokenCache := make(map[string]bool) i := 0 tidyFunc := func(leaseID string) { @@ -150,26 +150,33 @@ func (m *ExpirationManager) Tidy() error { revokeLease = true } - if _, ok := validTokenCache[le.ClientToken]; ok { - return - } + isValid, ok := tokenCache[le.ClientToken] + if !ok { + saltedID := m.tokenStore.SaltID(le.ClientToken) + lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) + lock.RLock() + te, err := m.tokenStore.lookupSalted(saltedID, true) + lock.RUnlock() - saltedID := m.tokenStore.SaltID(le.ClientToken) - lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) - lock.RLock() - te, err := m.tokenStore.lookupSalted(saltedID, true) - lock.RUnlock() - - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err)) - return - } + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to lookup token: %v", err)) + return + } - if te == nil { - m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) - revokeLease = true + if te == nil { + m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) + revokeLease = true + tokenCache[le.ClientToken] = false + } else { + tokenCache[le.ClientToken] = true + } } else { - validTokenCache[le.ClientToken] = struct{}{} + if isValid { + return + } else { + m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) + revokeLease = true + } } if revokeLease { From 98cdb6807f46f857a4cbc4afc4a1f8a0ca31f1d1 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 27 Apr 2017 11:41:33 -0400 Subject: [PATCH 13/41] Use an atomic lock for tidy operation in token store --- vault/token_store.go | 196 +++++++++++++++++++++++-------------------- 1 file changed, 103 insertions(+), 93 deletions(-) diff --git a/vault/token_store.go b/vault/token_store.go index c6f5498853f1..2e098015a989 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -3,6 +3,7 @@ package vault import ( "encoding/json" "fmt" + "sync/atomic" "regexp" "strings" @@ -95,6 +96,8 @@ type TokenStore struct { cubbyholeDestroyer func(*TokenStore, string) error logger log.Logger + + tidyLock int64 } // NewTokenStore is used to construct a token store that is @@ -1108,124 +1111,131 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt // handleTidy handles the cleaning up of leaked accessor storage entries and // cleaning up of leases that are associated to tokens that are expired. func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData) (*logical.Response, error) { - // List out all the accessors - saltedAccessorList, err := ts.view.List(accessorPrefix) - if err != nil { - return nil, fmt.Errorf("failed to fetch accessor entries: %v", err) - } - var tidyErrors *multierror.Error - // First, clean up secondary index entries that are no longer valid - parentList, err := ts.view.List(parentPrefix) - if err != nil { - return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) - } - - i := 0 + if atomic.CompareAndSwapInt64(&ts.tidyLock, 0, 1) { + ts.logger.Debug("token_store: beginning tidy operation on tokens") + defer atomic.CompareAndSwapInt64(&ts.tidyLock, 1, 0) + // List out all the accessors + saltedAccessorList, err := ts.view.List(accessorPrefix) + if err != nil { + return nil, fmt.Errorf("failed to fetch accessor entries: %v", err) + } - // Scan through the secondary index entries; if there is an entry - // with the token's salt ID at the end, remove it - for _, parent := range parentList { - children, err := ts.view.List(parentPrefix + parent + "/") + // First, clean up secondary index entries that are no longer valid + parentList, err := ts.view.List(parentPrefix) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read child index entry: %v", err)) - continue + return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) } - for _, child := range children { - i++ - if i%500 == 0 { - ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", i) - } + i := 0 - // Look up tainted entries so we can be sure that if this isn't - // found, it doesn't exist. Doing the following without locking - // since appropriate locks cannot be held with salted token IDs. - te, _ := ts.lookupSalted(child, true) - if te == nil { - ts.logger.Debug("token_store: deleting invalid token", "salted_token", child) - err = ts.view.Delete(parentPrefix + parent + "/" + child) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) - } + // Scan through the secondary index entries; if there is an entry + // with the token's salt ID at the end, remove it + for _, parent := range parentList { + children, err := ts.view.List(parentPrefix + parent + "/") + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read child index entry: %v", err)) + continue } - } - } - i = 0 - // For each of the accessor, see if the token ID associated with it is - // a valid one. If not, delete the leases associated with that token - // and delete the accessor as well. - for _, saltedAccessor := range saltedAccessorList { - i++ - if i%500 == 0 { - ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", i) - } + for _, child := range children { + i++ + if i%500 == 0 { + ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", i) + } - accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) - continue + // Look up tainted entries so we can be sure that if this isn't + // found, it doesn't exist. Doing the following without locking + // since appropriate locks cannot be held with salted token IDs. + te, _ := ts.lookupSalted(child, true) + if te == nil { + ts.logger.Debug("token_store: deleting invalid token", "salted_token", child) + err = ts.view.Delete(parentPrefix + parent + "/" + child) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) + } + } + } } - // A valid accessor storage entry should always have a token ID - // in it. If not, it is an invalid accessor entry and needs to - // be deleted. - if accessorEntry.TokenID == "" { - ts.logger.Debug("token_store: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) + i = 0 + // For each of the accessor, see if the token ID associated with it is + // a valid one. If not, delete the leases associated with that token + // and delete the accessor as well. + for _, saltedAccessor := range saltedAccessorList { + i++ + if i%500 == 0 { + ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", i) + } - // If deletion of accessor fails, move on to the next - // item since this is just a best-effort operation - err = ts.view.Delete(accessorPrefix + saltedAccessor) + accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) continue } - } - lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID) - lock.RLock() + // A valid accessor storage entry should always have a token ID + // in it. If not, it is an invalid accessor entry and needs to + // be deleted. + if accessorEntry.TokenID == "" { + ts.logger.Debug("token_store: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) - // Look up tainted variants so we only find entries that truly don't - // exist - saltedId := ts.SaltID(accessorEntry.TokenID) - te, err := ts.lookupSalted(saltedId, true) - lock.RUnlock() + // If deletion of accessor fails, move on to the next + // item since this is just a best-effort operation + err = ts.view.Delete(accessorPrefix + saltedAccessor) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err)) + continue + } + } - // If token entry is not found assume that the token is not valid any - // more and conclude that accessor, leases, and secondary index entries - // for this token should not exist as well. - if te == nil { - ts.logger.Debug("token_store: deleting token with nil entry", "salted_token", saltedId) + lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID) + lock.RLock() - // RevokeByToken expects a '*TokenEntry'. For the - // purposes of tidying, it is sufficient if the token - // entry only has ID set. - tokenEntry := &TokenEntry{ - ID: accessorEntry.TokenID, - } + // Look up tainted variants so we only find entries that truly don't + // exist + saltedId := ts.SaltID(accessorEntry.TokenID) + te, err := ts.lookupSalted(saltedId, true) + lock.RUnlock() - // Attempt to revoke the token. This will also revoke - // the leases associated with the token. - err := ts.expiration.RevokeByToken(tokenEntry) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) - continue - } + // If token entry is not found assume that the token is not valid any + // more and conclude that accessor, leases, and secondary index entries + // for this token should not exist as well. + if te == nil { + ts.logger.Debug("token_store: deleting token with nil entry", "salted_token", saltedId) - ts.logger.Debug("token_store: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) + // RevokeByToken expects a '*TokenEntry'. For the + // purposes of tidying, it is sufficient if the token + // entry only has ID set. + tokenEntry := &TokenEntry{ + ID: accessorEntry.TokenID, + } - // If deletion of accessor fails, move on to the next item since - // this is just a best-effort operation. We do this last so that on - // next run if something above failed we still have the accessor - // entry to try again. - err = ts.view.Delete(accessorPrefix + saltedAccessor) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) - continue + // Attempt to revoke the token. This will also revoke + // the leases associated with the token. + err := ts.expiration.RevokeByToken(tokenEntry) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) + continue + } + + ts.logger.Debug("token_store: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) + + // If deletion of accessor fails, move on to the next item since + // this is just a best-effort operation. We do this last so that on + // next run if something above failed we still have the accessor + // entry to try again. + err = ts.view.Delete(accessorPrefix + saltedAccessor) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) + continue + } } } + } else { + ts.logger.Debug("token_store: tidy operation on tokens is already in progress") + return nil, fmt.Errorf("tidy operation on tokens is already in progress") } // Later request handling code seems to check if the type is multierror so From 2ef62fe592ac5548dd5032f38dd683450788714e Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 27 Apr 2017 13:48:29 -0400 Subject: [PATCH 14/41] refactor lock handling in token tidy function --- vault/expiration.go | 10 +-- vault/token_store.go | 207 +++++++++++++++++++++---------------------- 2 files changed, 103 insertions(+), 114 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 96617cb6b5c9..1c37cd93acef 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -130,7 +130,7 @@ func (m *ExpirationManager) Tidy() error { tidyFunc := func(leaseID string) { i++ if i%500 == 0 { - m.logger.Debug("expiration: tidying of leases", "progress", i) + m.logger.Debug("expiration: tidying leases", "progress", i) } le, err := m.loadEntry(leaseID) @@ -202,13 +202,7 @@ func (m *ExpirationManager) Tidy() error { return fmt.Errorf("tidy operation on leases is already in progress") } - // If no errors were encountered, return a normal error instead of a - // multierror - if tidyErrors == nil { - return nil - } - - return tidyErrors + return tidyErrors.ErrorOrNil() } // Restore is used to recover the lease states when starting. diff --git a/vault/token_store.go b/vault/token_store.go index 2e098015a989..8e0cd46ae63a 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1113,138 +1113,133 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData) (*logical.Response, error) { var tidyErrors *multierror.Error - if atomic.CompareAndSwapInt64(&ts.tidyLock, 0, 1) { - ts.logger.Debug("token_store: beginning tidy operation on tokens") - defer atomic.CompareAndSwapInt64(&ts.tidyLock, 1, 0) - // List out all the accessors - saltedAccessorList, err := ts.view.List(accessorPrefix) - if err != nil { - return nil, fmt.Errorf("failed to fetch accessor entries: %v", err) - } + if !atomic.CompareAndSwapInt64(&ts.tidyLock, 0, 1) { + ts.logger.Debug("token_store: tidy operation on tokens is already in progress") + return nil, fmt.Errorf("tidy operation on tokens is already in progress") + } - // First, clean up secondary index entries that are no longer valid - parentList, err := ts.view.List(parentPrefix) - if err != nil { - return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) - } + defer atomic.CompareAndSwapInt64(&ts.tidyLock, 1, 0) - i := 0 + ts.logger.Debug("token_store: beginning tidy operation on tokens") + // List out all the accessors + saltedAccessorList, err := ts.view.List(accessorPrefix) + if err != nil { + return nil, fmt.Errorf("failed to fetch accessor entries: %v", err) + } - // Scan through the secondary index entries; if there is an entry - // with the token's salt ID at the end, remove it - for _, parent := range parentList { - children, err := ts.view.List(parentPrefix + parent + "/") - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read child index entry: %v", err)) - continue - } + // First, clean up secondary index entries that are no longer valid + parentList, err := ts.view.List(parentPrefix) + if err != nil { + return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) + } - for _, child := range children { - i++ - if i%500 == 0 { - ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", i) - } + i := 0 - // Look up tainted entries so we can be sure that if this isn't - // found, it doesn't exist. Doing the following without locking - // since appropriate locks cannot be held with salted token IDs. - te, _ := ts.lookupSalted(child, true) - if te == nil { - ts.logger.Debug("token_store: deleting invalid token", "salted_token", child) - err = ts.view.Delete(parentPrefix + parent + "/" + child) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) - } - } - } + // Scan through the secondary index entries; if there is an entry + // with the token's salt ID at the end, remove it + for _, parent := range parentList { + children, err := ts.view.List(parentPrefix + parent + "/") + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read child index entry: %v", err)) + continue } - i = 0 - // For each of the accessor, see if the token ID associated with it is - // a valid one. If not, delete the leases associated with that token - // and delete the accessor as well. - for _, saltedAccessor := range saltedAccessorList { + for _, child := range children { i++ if i%500 == 0 { - ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", i) + ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", i) } - accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) - continue + // Look up tainted entries so we can be sure that if this isn't + // found, it doesn't exist. Doing the following without locking + // since appropriate locks cannot be held with salted token IDs. + te, _ := ts.lookupSalted(child, true) + if te == nil { + ts.logger.Debug("token_store: deleting invalid token", "salted_token", child) + err = ts.view.Delete(parentPrefix + parent + "/" + child) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) + } } + } + } - // A valid accessor storage entry should always have a token ID - // in it. If not, it is an invalid accessor entry and needs to - // be deleted. - if accessorEntry.TokenID == "" { - ts.logger.Debug("token_store: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) + i = 0 + // For each of the accessor, see if the token ID associated with it is + // a valid one. If not, delete the leases associated with that token + // and delete the accessor as well. + for _, saltedAccessor := range saltedAccessorList { + i++ + if i%500 == 0 { + ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", i) + } - // If deletion of accessor fails, move on to the next - // item since this is just a best-effort operation - err = ts.view.Delete(accessorPrefix + saltedAccessor) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err)) - continue - } + accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) + continue + } + + // A valid accessor storage entry should always have a token ID + // in it. If not, it is an invalid accessor entry and needs to + // be deleted. + if accessorEntry.TokenID == "" { + ts.logger.Debug("token_store: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) + + // If deletion of accessor fails, move on to the next + // item since this is just a best-effort operation + err = ts.view.Delete(accessorPrefix + saltedAccessor) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err)) + continue } + } - lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID) - lock.RLock() + lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID) + lock.RLock() - // Look up tainted variants so we only find entries that truly don't - // exist - saltedId := ts.SaltID(accessorEntry.TokenID) - te, err := ts.lookupSalted(saltedId, true) - lock.RUnlock() + // Look up tainted variants so we only find entries that truly don't + // exist + saltedId := ts.SaltID(accessorEntry.TokenID) + te, err := ts.lookupSalted(saltedId, true) + lock.RUnlock() - // If token entry is not found assume that the token is not valid any - // more and conclude that accessor, leases, and secondary index entries - // for this token should not exist as well. - if te == nil { - ts.logger.Debug("token_store: deleting token with nil entry", "salted_token", saltedId) + // If token entry is not found assume that the token is not valid any + // more and conclude that accessor, leases, and secondary index entries + // for this token should not exist as well. + if te == nil { + ts.logger.Debug("token_store: deleting token with nil entry", "salted_token", saltedId) - // RevokeByToken expects a '*TokenEntry'. For the - // purposes of tidying, it is sufficient if the token - // entry only has ID set. - tokenEntry := &TokenEntry{ - ID: accessorEntry.TokenID, - } + // RevokeByToken expects a '*TokenEntry'. For the + // purposes of tidying, it is sufficient if the token + // entry only has ID set. + tokenEntry := &TokenEntry{ + ID: accessorEntry.TokenID, + } - // Attempt to revoke the token. This will also revoke - // the leases associated with the token. - err := ts.expiration.RevokeByToken(tokenEntry) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) - continue - } + // Attempt to revoke the token. This will also revoke + // the leases associated with the token. + err := ts.expiration.RevokeByToken(tokenEntry) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) + continue + } - ts.logger.Debug("token_store: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) + ts.logger.Debug("token_store: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) - // If deletion of accessor fails, move on to the next item since - // this is just a best-effort operation. We do this last so that on - // next run if something above failed we still have the accessor - // entry to try again. - err = ts.view.Delete(accessorPrefix + saltedAccessor) - if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) - continue - } + // If deletion of accessor fails, move on to the next item since + // this is just a best-effort operation. We do this last so that on + // next run if something above failed we still have the accessor + // entry to try again. + err = ts.view.Delete(accessorPrefix + saltedAccessor) + if err != nil { + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) + continue } } - } else { - ts.logger.Debug("token_store: tidy operation on tokens is already in progress") - return nil, fmt.Errorf("tidy operation on tokens is already in progress") - } - - // Later request handling code seems to check if the type is multierror so - // if we haven't added any errors we need to just return a normal nil error - if tidyErrors == nil { - return nil, nil } - return nil, tidyErrors + return nil, tidyErrors.ErrorOrNil() } // handleUpdateLookupAccessor handles the auth/token/lookup-accessor path for returning From a8ef2c013eece04d799f130488a58febfd54c3c1 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 27 Apr 2017 16:22:19 -0400 Subject: [PATCH 15/41] Refactor locking code in lease tidy; add ending debug statements --- vault/expiration.go | 22 +++++++++++++--------- vault/token_store.go | 2 ++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 1c37cd93acef..f87176032d81 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -123,6 +123,15 @@ func (c *Core) stopExpiration() error { func (m *ExpirationManager) Tidy() error { var tidyErrors *multierror.Error + if !atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { + m.logger.Debug("expiration: tidy operation on leases is already in progress") + return fmt.Errorf("tidy operation on leases is already in progress") + } + + defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) + + m.logger.Debug("expiration: beginning tidy operation on leases") + // Create a cache to keep track of looked up tokens tokenCache := make(map[string]bool) i := 0 @@ -191,17 +200,12 @@ func (m *ExpirationManager) Tidy() error { } } - if atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { - m.logger.Debug("expiration: beginning tidy operation on leases") - defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) - if err := logical.ScanView(m.idView, tidyFunc); err != nil { - return err - } - } else { - m.logger.Debug("expiration: tidy operation on leases is already in progress") - return fmt.Errorf("tidy operation on leases is already in progress") + if err := logical.ScanView(m.idView, tidyFunc); err != nil { + return err } + m.logger.Debug("expiration: ending tidy operation on leases") + return tidyErrors.ErrorOrNil() } diff --git a/vault/token_store.go b/vault/token_store.go index 8e0cd46ae63a..69df9f3d471a 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1239,6 +1239,8 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } } + ts.logger.Debug("token_store: ending tidy operation on tokens") + return nil, tidyErrors.ErrorOrNil() } From 8c7b1754fc1c27b368d218255c634601d1490652 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 2 May 2017 16:53:41 -0400 Subject: [PATCH 16/41] Skip checking the validity of an empty client token --- vault/expiration.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vault/expiration.go b/vault/expiration.go index f87176032d81..0b4ee1090e33 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -153,13 +153,16 @@ func (m *ExpirationManager) Tidy() error { return } + var isValid bool + var ok bool revokeLease := false if le.ClientToken == "" { m.logger.Debug("expiration: lease has an empty token", "lease_id", leaseID) revokeLease = true + goto REVOKE_CHECK } - isValid, ok := tokenCache[le.ClientToken] + isValid, ok = tokenCache[le.ClientToken] if !ok { saltedID := m.tokenStore.SaltID(le.ClientToken) lock := locksutil.LockForKey(m.tokenStore.tokenLocks, le.ClientToken) @@ -188,6 +191,7 @@ func (m *ExpirationManager) Tidy() error { } } + REVOKE_CHECK: if revokeLease { m.logger.Debug("expiration: invalid lease is being revoked", "lease_id", leaseID) // Force the revocation and skip going through the token store From 853233aa12b6c213aca51d95c56dc2b71eb2cd92 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 2 May 2017 16:54:03 -0400 Subject: [PATCH 17/41] Added a test for tidying of empty token --- vault/expiration_test.go | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index b8255b3e9d6b..d0e09e06da8f 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -33,6 +33,61 @@ func mockBackendExpiration(t testing.TB, backend physical.Backend) (*Core, *Expi return c, ts.expiration } +func TestExpiration_Tidy(t *testing.T) { + exp := mockExpiration(t) + + // Set up a count function to calculate number of leases + count := 0 + countFunc := func(leaseID string) { + count++ + } + + // Scan the storage with the count func set + if err := logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that there are no leases to begin with + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } + + // Create a lease entry without a client token in it + le := &leaseEntry{ + LeaseID: "lease/with/no/client/token", + Path: "foo/bar", + } + + // Persist the invalid lease entry + if err := exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + count = 0 + if err := logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that the storage was successful and that the count of leases is + // now 1 + if count != 1 { + t.Fatalf("bad: lease count; expected:1 actual:%d", count) + } + + // Run the tidy operation + err := exp.Tidy() + + count = 0 + if err := logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Post the tidy operation, the invalid lease entry should have been gone + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } +} + func BenchmarkExpiration_Restore_Etcd(b *testing.B) { addr := os.Getenv("PHYSICAL_BACKEND_BENCHMARK_ADDR") randPath := fmt.Sprintf("vault-%d/", time.Now().Unix()) From d07d3cbc9a8381777b30f1e83207c035562860c9 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 2 May 2017 17:11:35 -0400 Subject: [PATCH 18/41] Added steps to check if invalid token is properly cleaned up --- vault/expiration.go | 3 +-- vault/expiration_test.go | 37 +++++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 0b4ee1090e33..f6ca1211c98d 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -153,8 +153,7 @@ func (m *ExpirationManager) Tidy() error { return } - var isValid bool - var ok bool + var isValid, ok bool revokeLease := false if le.ClientToken == "" { m.logger.Debug("expiration: lease has an empty token", "lease_id", leaseID) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index d0e09e06da8f..131092853d05 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -34,6 +34,8 @@ func mockBackendExpiration(t testing.TB, backend physical.Backend) (*Core, *Expi } func TestExpiration_Tidy(t *testing.T) { + var err error + exp := mockExpiration(t) // Set up a count function to calculate number of leases @@ -43,7 +45,7 @@ func TestExpiration_Tidy(t *testing.T) { } // Scan the storage with the count func set - if err := logical.ScanView(exp.idView, countFunc); err != nil { + if err = logical.ScanView(exp.idView, countFunc); err != nil { t.Fatal(err) } @@ -59,12 +61,12 @@ func TestExpiration_Tidy(t *testing.T) { } // Persist the invalid lease entry - if err := exp.persistEntry(le); err != nil { + if err = exp.persistEntry(le); err != nil { t.Fatalf("error persisting entry: %v", err) } count = 0 - if err := logical.ScanView(exp.idView, countFunc); err != nil { + if err = logical.ScanView(exp.idView, countFunc); err != nil { t.Fatal(err) } @@ -75,7 +77,7 @@ func TestExpiration_Tidy(t *testing.T) { } // Run the tidy operation - err := exp.Tidy() + err = exp.Tidy() count = 0 if err := logical.ScanView(exp.idView, countFunc); err != nil { @@ -86,6 +88,33 @@ func TestExpiration_Tidy(t *testing.T) { if count != 0 { t.Fatalf("bad: lease count; expected:0 actual:%d", count) } + + // Set a revoked/invalid token in the lease entry + le.ClientToken = "invalidtoken" + + // Persist the invalid lease entry + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that the storage was successful and that the count of leases is + // now 1 + if count != 1 { + t.Fatalf("bad: lease count; expected:1 actual:%d", count) + } + + // Run the tidy operation + err = exp.Tidy() + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } } func BenchmarkExpiration_Restore_Etcd(b *testing.B) { From 79fc0d8d44a53f1e475bf66719a7feec7f7a43d1 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 2 May 2017 17:15:26 -0400 Subject: [PATCH 19/41] Check if multiple leases with same invalid token is getting cleaned up --- vault/expiration_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 131092853d05..79a93bf04cb0 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -115,6 +115,24 @@ func TestExpiration_Tidy(t *testing.T) { if err = logical.ScanView(exp.idView, countFunc); err != nil { t.Fatal(err) } + + // Attach an invalid token with 2 leases + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + le.LeaseID = "another/invalid/lease" + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + // Run the tidy operation + err = exp.Tidy() + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } } func BenchmarkExpiration_Restore_Etcd(b *testing.B) { From 497bebe403a39dc774444ebae92efdaa3e0b24f2 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 2 May 2017 17:56:15 -0400 Subject: [PATCH 20/41] Do not duplicate log lines for invalid leases --- vault/expiration.go | 7 ++-- vault/expiration_test.go | 79 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index f6ca1211c98d..52e8f6d41a9e 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -156,7 +156,7 @@ func (m *ExpirationManager) Tidy() error { var isValid, ok bool revokeLease := false if le.ClientToken == "" { - m.logger.Debug("expiration: lease has an empty token", "lease_id", leaseID) + m.logger.Debug("expiration: revoking lease which has an empty token", "lease_id", leaseID) revokeLease = true goto REVOKE_CHECK } @@ -175,7 +175,7 @@ func (m *ExpirationManager) Tidy() error { } if te == nil { - m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) + m.logger.Debug("expiration: revoking lease which holds an invalid token", "lease_id", leaseID) revokeLease = true tokenCache[le.ClientToken] = false } else { @@ -185,14 +185,13 @@ func (m *ExpirationManager) Tidy() error { if isValid { return } else { - m.logger.Debug("expiration: lease has an invalid token", "lease_id", leaseID) + m.logger.Debug("expiration: revoking lease which contains an invalid token", "lease_id", leaseID) revokeLease = true } } REVOKE_CHECK: if revokeLease { - m.logger.Debug("expiration: invalid lease is being revoked", "lease_id", leaseID) // Force the revocation and skip going through the token store // again err = m.revokeCommon(leaseID, true, true) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 79a93bf04cb0..2b685acf4c5b 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -78,6 +78,9 @@ func TestExpiration_Tidy(t *testing.T) { // Run the tidy operation err = exp.Tidy() + if err != nil { + t.Fatal(err) + } count = 0 if err := logical.ScanView(exp.idView, countFunc); err != nil { @@ -110,12 +113,20 @@ func TestExpiration_Tidy(t *testing.T) { // Run the tidy operation err = exp.Tidy() + if err != nil { + t.Fatal(err) + } count = 0 if err = logical.ScanView(exp.idView, countFunc); err != nil { t.Fatal(err) } + // Post the tidy operation, the invalid lease entry should have been gone + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } + // Attach an invalid token with 2 leases if err = exp.persistEntry(le); err != nil { t.Fatalf("error persisting entry: %v", err) @@ -128,11 +139,79 @@ func TestExpiration_Tidy(t *testing.T) { // Run the tidy operation err = exp.Tidy() + if err != nil { + t.Fatal(err) + } count = 0 if err = logical.ScanView(exp.idView, countFunc); err != nil { t.Fatal(err) } + + // Post the tidy operation, the invalid lease entry should have been gone + if count != 0 { + t.Fatalf("bad: lease count; expected:0 actual:%d", count) + } + + for i := 0; i < 1000; i++ { + req := &logical.Request{ + Operation: logical.ReadOperation, + Path: "invalid/lease/" + fmt.Sprintf("%d", i+1), + } + resp := &logical.Response{ + Secret: &logical.Secret{ + LeaseOptions: logical.LeaseOptions{ + TTL: 100 * time.Millisecond, + }, + }, + Data: map[string]interface{}{ + "test_key": "test_value", + }, + } + _, err := exp.Register(req, resp) + if err != nil { + t.Fatalf("err: %v", err) + } + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Check that there are 1000 leases now + if count != 1000 { + t.Fatalf("bad: lease count; expected:1000 actual:%d", count) + } + + errCh1 := make(chan error) + errCh2 := make(chan error) + + // Initiate tidy of the above 1000 invalid leases in quick succession. Only + // one tidy operation can be in flight at any time. One of these requests + // should error out. + go func() { + errCh1 <- exp.Tidy() + }() + + go func() { + errCh2 <- exp.Tidy() + }() + + var err1, err2 error + + for i := 0; i < 2; i++ { + select { + case err1 = <-errCh1: + fmt.Printf("err1 received: %#v\n", err1) + case err2 = <-errCh2: + fmt.Printf("err2 received: %#v\n", err2) + } + } + + if err1 == nil && err2 == nil { + t.Fatal("expected error") + } } func BenchmarkExpiration_Restore_Etcd(b *testing.B) { From aa08e5ca4cfef8f151d8185f2c31cbb72c471f91 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 2 May 2017 18:06:59 -0400 Subject: [PATCH 21/41] Added test to check the atomicity of the lease tidy operation --- vault/expiration_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 2b685acf4c5b..4a51738a619a 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -155,8 +155,9 @@ func TestExpiration_Tidy(t *testing.T) { for i := 0; i < 1000; i++ { req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "invalid/lease/" + fmt.Sprintf("%d", i+1), + Operation: logical.ReadOperation, + Path: "invalid/lease/" + fmt.Sprintf("%d", i+1), + ClientToken: "invalidtoken", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -203,14 +204,13 @@ func TestExpiration_Tidy(t *testing.T) { for i := 0; i < 2; i++ { select { case err1 = <-errCh1: - fmt.Printf("err1 received: %#v\n", err1) case err2 = <-errCh2: - fmt.Printf("err2 received: %#v\n", err2) } } - if err1 == nil && err2 == nil { - t.Fatal("expected error") + if !(err1 != nil && err1.Error() == "tidy operation on leases is already in progress") && + !(err2 != nil && err2.Error() == "tidy operation on leases is already in progress") { + t.Fatal("expected at least one of err1 or err2 to be set; err1: %#v\n err2:%#v\n", err1, err2) } } From a3c2a420e823b55f70efa0f8516069aaf0d7252c Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Tue, 2 May 2017 18:12:03 -0400 Subject: [PATCH 22/41] Test to check that leases with valid tokens are not being cleaned up --- vault/expiration_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 4a51738a619a..5676cfaca9d4 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -212,6 +212,24 @@ func TestExpiration_Tidy(t *testing.T) { !(err2 != nil && err2.Error() == "tidy operation on leases is already in progress") { t.Fatal("expected at least one of err1 or err2 to be set; err1: %#v\n err2:%#v\n", err1, err2) } + + root, err := exp.tokenStore.rootToken() + le.ClientToken = root.ID + + // Attach a valid token with the leases + if err = exp.persistEntry(le); err != nil { + t.Fatalf("error persisting entry: %v", err) + } + + count = 0 + if err = logical.ScanView(exp.idView, countFunc); err != nil { + t.Fatal(err) + } + + // Post the tidy operation, the valid lease entry should not get affected + if count != 1 { + t.Fatalf("bad: lease count; expected:1 actual:%d", count) + } } func BenchmarkExpiration_Restore_Etcd(b *testing.B) { From b6843ec57d0fdf734c62a2d2a6137acd2f7f5cf6 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 3 May 2017 10:54:07 -0400 Subject: [PATCH 23/41] Added summary logs to help better understand the consequence --- vault/expiration.go | 17 +++++++++++++---- vault/token_store.go | 32 ++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 52e8f6d41a9e..4e970bbcdb9c 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -134,12 +134,14 @@ func (m *ExpirationManager) Tidy() error { // Create a cache to keep track of looked up tokens tokenCache := make(map[string]bool) - i := 0 + countLease := 0 + deletedCountEmptyToken := 0 + deletedCountInvalidToken := 0 tidyFunc := func(leaseID string) { - i++ - if i%500 == 0 { - m.logger.Debug("expiration: tidying leases", "progress", i) + countLease++ + if countLease%500 == 0 { + m.logger.Debug("expiration: tidying leases", "progress", countLease) } le, err := m.loadEntry(leaseID) @@ -158,6 +160,7 @@ func (m *ExpirationManager) Tidy() error { if le.ClientToken == "" { m.logger.Debug("expiration: revoking lease which has an empty token", "lease_id", leaseID) revokeLease = true + deletedCountEmptyToken++ goto REVOKE_CHECK } @@ -177,6 +180,7 @@ func (m *ExpirationManager) Tidy() error { if te == nil { m.logger.Debug("expiration: revoking lease which holds an invalid token", "lease_id", leaseID) revokeLease = true + deletedCountInvalidToken++ tokenCache[le.ClientToken] = false } else { tokenCache[le.ClientToken] = true @@ -186,6 +190,7 @@ func (m *ExpirationManager) Tidy() error { return } else { m.logger.Debug("expiration: revoking lease which contains an invalid token", "lease_id", leaseID) + deletedCountInvalidToken++ revokeLease = true } } @@ -208,6 +213,10 @@ func (m *ExpirationManager) Tidy() error { m.logger.Debug("expiration: ending tidy operation on leases") + m.logger.Debug("expiration: number of leases scanned", "total", countLease) + m.logger.Debug("expiration: number of revoked leases which had empty tokens", "deleted_count", deletedCountEmptyToken) + m.logger.Debug("expiration: number of revoked leases which had invalid tokens", "deleted_count", deletedCountInvalidToken) + return tidyErrors.ErrorOrNil() } diff --git a/vault/token_store.go b/vault/token_store.go index 69df9f3d471a..a8e6b321f2ea 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1133,7 +1133,8 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) } - i := 0 + countParentList := 0 + deletedCountParentList := 0 // Scan through the secondary index entries; if there is an entry // with the token's salt ID at the end, remove it @@ -1145,9 +1146,9 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } for _, child := range children { - i++ - if i%500 == 0 { - ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", i) + countParentList++ + if countParentList%500 == 0 { + ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", countParentList) } // Look up tainted entries so we can be sure that if this isn't @@ -1160,18 +1161,23 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) } + deletedCountParentList++ } } } - i = 0 + countAccessorList := 0 + deletedCountAccessorEmptyToken := 0 + deletedCountAccessorInvalidToken := 0 + deletedCountInvalidTokenInAccessor := 0 + // For each of the accessor, see if the token ID associated with it is // a valid one. If not, delete the leases associated with that token // and delete the accessor as well. for _, saltedAccessor := range saltedAccessorList { - i++ - if i%500 == 0 { - ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", i) + countAccessorList++ + if countAccessorList%500 == 0 { + ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", countAccessorList) } accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) @@ -1193,6 +1199,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err)) continue } + deletedCountAccessorEmptyToken++ } lock := locksutil.LockForKey(ts.tokenLocks, accessorEntry.TokenID) @@ -1224,6 +1231,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) continue } + deletedCountInvalidTokenInAccessor++ ts.logger.Debug("token_store: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) @@ -1236,11 +1244,19 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) continue } + deletedCountAccessorInvalidToken++ } } ts.logger.Debug("token_store: ending tidy operation on tokens") + ts.logger.Debug("token_store: number of tokens scanned in parent list", "total", countParentList) + ts.logger.Debug("token_store: number of tokens revoked in parent list", "deleted_count", deletedCountParentList) + ts.logger.Debug("token_store: number of accessor scanned", "total", countAccessorList) + ts.logger.Debug("token_store: number of deleted accessors which had empty tokens", "deleted_count", deletedCountAccessorEmptyToken) + ts.logger.Debug("token_store: number of revoked tokens which were invalid but present in accessors", "deleted_count", deletedCountInvalidTokenInAccessor) + ts.logger.Debug("token_store: number of deleted accessors which had invalid tokens", "deleted_count", deletedCountAccessorInvalidToken) + return nil, tidyErrors.ErrorOrNil() } From 0c025405d8659a6260ca3cec38e1c660c14e6e1b Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 3 May 2017 11:15:59 -0400 Subject: [PATCH 24/41] Less scary debugging --- vault/token_store.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vault/token_store.go b/vault/token_store.go index a8e6b321f2ea..0c2bdbea3b77 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1156,7 +1156,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { - ts.logger.Debug("token_store: deleting invalid token", "salted_token", child) + ts.logger.Debug("token_store: deleting invalid secondary index entry", "salted_token", child) err = ts.view.Delete(parentPrefix + parent + "/" + child) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) From b3c6a5651319238e93615a8a3a9136ba2737d00c Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 3 May 2017 12:14:58 -0400 Subject: [PATCH 25/41] change some logging output --- vault/token_store.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/vault/token_store.go b/vault/token_store.go index 0c2bdbea3b77..9521f1f9e4f9 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1114,13 +1114,13 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData var tidyErrors *multierror.Error if !atomic.CompareAndSwapInt64(&ts.tidyLock, 0, 1) { - ts.logger.Debug("token_store: tidy operation on tokens is already in progress") + ts.logger.Debug("token: tidy operation on tokens is already in progress") return nil, fmt.Errorf("tidy operation on tokens is already in progress") } defer atomic.CompareAndSwapInt64(&ts.tidyLock, 1, 0) - ts.logger.Debug("token_store: beginning tidy operation on tokens") + ts.logger.Debug("token: beginning tidy operation on tokens") // List out all the accessors saltedAccessorList, err := ts.view.List(accessorPrefix) if err != nil { @@ -1148,7 +1148,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData for _, child := range children { countParentList++ if countParentList%500 == 0 { - ts.logger.Debug("token_store: checking validity of tokens in parent list", "progress", countParentList) + ts.logger.Debug("token: checking validity of tokens in parent list", "progress", countParentList) } // Look up tainted entries so we can be sure that if this isn't @@ -1156,7 +1156,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { - ts.logger.Debug("token_store: deleting invalid secondary index entry", "salted_token", child) + ts.logger.Debug("token: deleting invalid secondary index entry", "salted_token", child) err = ts.view.Delete(parentPrefix + parent + "/" + child) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) @@ -1177,7 +1177,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData for _, saltedAccessor := range saltedAccessorList { countAccessorList++ if countAccessorList%500 == 0 { - ts.logger.Debug("token_store: checking if accessors contain valid tokens", "progress", countAccessorList) + ts.logger.Debug("token: checking if accessors contain valid tokens", "progress", countAccessorList) } accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) @@ -1190,7 +1190,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // in it. If not, it is an invalid accessor entry and needs to // be deleted. if accessorEntry.TokenID == "" { - ts.logger.Debug("token_store: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) + ts.logger.Debug("token: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation @@ -1215,7 +1215,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // more and conclude that accessor, leases, and secondary index entries // for this token should not exist as well. if te == nil { - ts.logger.Debug("token_store: deleting token with nil entry", "salted_token", saltedId) + ts.logger.Debug("token: deleting token with nil entry", "salted_token", saltedId) // RevokeByToken expects a '*TokenEntry'. For the // purposes of tidying, it is sufficient if the token @@ -1233,7 +1233,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } deletedCountInvalidTokenInAccessor++ - ts.logger.Debug("token_store: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) + ts.logger.Debug("token: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) // If deletion of accessor fails, move on to the next item since // this is just a best-effort operation. We do this last so that on @@ -1248,14 +1248,14 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } } - ts.logger.Debug("token_store: ending tidy operation on tokens") + ts.logger.Debug("token: ending tidy operation on tokens") - ts.logger.Debug("token_store: number of tokens scanned in parent list", "total", countParentList) - ts.logger.Debug("token_store: number of tokens revoked in parent list", "deleted_count", deletedCountParentList) - ts.logger.Debug("token_store: number of accessor scanned", "total", countAccessorList) - ts.logger.Debug("token_store: number of deleted accessors which had empty tokens", "deleted_count", deletedCountAccessorEmptyToken) - ts.logger.Debug("token_store: number of revoked tokens which were invalid but present in accessors", "deleted_count", deletedCountInvalidTokenInAccessor) - ts.logger.Debug("token_store: number of deleted accessors which had invalid tokens", "deleted_count", deletedCountAccessorInvalidToken) + ts.logger.Debug("token: number of tokens scanned in parent list", "total", countParentList) + ts.logger.Debug("token: number of tokens revoked in parent list", "deleted_count", deletedCountParentList) + ts.logger.Debug("token: number of accessor scanned", "total", countAccessorList) + ts.logger.Debug("token: number of deleted accessors which had empty tokens", "deleted_count", deletedCountAccessorEmptyToken) + ts.logger.Debug("token: number of revoked tokens which were invalid but present in accessors", "deleted_count", deletedCountInvalidTokenInAccessor) + ts.logger.Debug("token: number of deleted accessors which had invalid tokens", "deleted_count", deletedCountAccessorInvalidToken) return nil, tidyErrors.ErrorOrNil() } From 415b0a2dd7fcc708de306a13ad95412fb229a11a Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 3 May 2017 12:17:09 -0400 Subject: [PATCH 26/41] Two things: 1) Ensure that if we fail to generate a lease for a secret we attempt to revoke it 2) Ensure that any lease that is registered should never have a blank token In theory, number 2 will let us a) find places where this *is* the case, and b) if errors are encountered when revoking tokens due to a blank client token, it suggests that the client token values are being stripped somewhere along the way, which is also instructive. --- vault/expiration.go | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 4e970bbcdb9c..ad5346249f5e 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -12,6 +12,7 @@ import ( "github.com/armon/go-metrics" log "github.com/mgutz/logxi/v1" + "github.com/hashicorp/errwrap" multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/helper/consts" @@ -604,7 +605,7 @@ func (m *ExpirationManager) RenewToken(req *logical.Request, source string, toke // Register is used to take a request and response with an associated // lease. The secret gets assigned a LeaseID and the management of // of lease is assumed by the expiration manager. -func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (string, error) { +func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (leaseID string, retErr error) { defer metrics.MeasureSince([]string{"expire", "register"}, time.Now()) // Ignore if there is no leased secret if resp == nil || resp.Secret == nil { @@ -616,6 +617,19 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons return "", err } + defer func() { + if retErr != nil { + err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) + if err != nil { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) + } + } + }() + + if req.ClientToken == "" { + return "", fmt.Errorf("expiration: cannot register a lease with an empty client token") + } + // Create a lease entry leaseUUID, err := uuid.GenerateUUID() if err != nil { @@ -654,6 +668,10 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons func (m *ExpirationManager) RegisterAuth(source string, auth *logical.Auth) error { defer metrics.MeasureSince([]string{"expire", "register-auth"}, time.Now()) + if auth.ClientToken == "" { + return fmt.Errorf("expiration: cannot register an auth lease with an empty token") + } + // Create a lease entry le := leaseEntry{ LeaseID: path.Join(source, m.tokenStore.SaltID(auth.ClientToken)), @@ -774,7 +792,7 @@ func (m *ExpirationManager) revokeEntry(le *leaseEntry) error { // Revocation of login tokens is special since we can by-pass the // backend and directly interact with the token store if le.Auth != nil { - if err := m.tokenStore.RevokeTree(le.Auth.ClientToken); err != nil { + if err := m.tokenStore.RevokeTree(le.ClientToken); err != nil { return fmt.Errorf("failed to revoke token: %v", err) } From 0bda5a7d616bc8fb46c640b071eb881fa94f4348 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 3 May 2017 12:36:10 -0400 Subject: [PATCH 27/41] Adhere to tainted status in salted accessor lookup --- vault/expiration.go | 4 +++- vault/token_store.go | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index ad5346249f5e..fec2e79af351 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -619,9 +619,11 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons defer func() { if retErr != nil { - err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) + revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) if err != nil { retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) + } else if revResp != nil && revResp.IsError() { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", revResp.Error())) } } }() diff --git a/vault/token_store.go b/vault/token_store.go index 9521f1f9e4f9..208883a5517c 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -618,7 +618,7 @@ func (ts *TokenStore) tokenStoreAccessorList( ret := make([]string, 0, len(entries)) for _, entry := range entries { - aEntry, err := ts.lookupBySaltedAccessor(entry) + aEntry, err := ts.lookupBySaltedAccessor(entry, false) if err != nil { resp.AddWarning("Found an accessor entry that could not be successfully decoded") continue @@ -1073,10 +1073,10 @@ func (ts *TokenStore) handleCreateAgainstRole( } func (ts *TokenStore) lookupByAccessor(accessor string) (accessorEntry, error) { - return ts.lookupBySaltedAccessor(ts.SaltID(accessor)) + return ts.lookupBySaltedAccessor(ts.SaltID(accessor), false) } -func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEntry, error) { +func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string, tainted bool) (accessorEntry, error) { entry, err := ts.view.Get(accessorPrefix + saltedAccessor) var aEntry accessorEntry @@ -1090,8 +1090,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt err = jsonutil.DecodeJSON(entry.Value, &aEntry) // If we hit an error, assume it's a pre-struct straight token ID if err != nil { - aEntry.TokenID = string(entry.Value) - te, err := ts.lookupSalted(ts.SaltID(aEntry.TokenID), false) + te, err := ts.lookupSalted(ts.SaltID(string(entry.Value)), tainted) if err != nil { return accessorEntry{}, fmt.Errorf("failed to look up token using accessor index: %s", err) } @@ -1101,6 +1100,7 @@ func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string) (accessorEnt // on lookup is nil, not an error, so we keep that behavior here to be // safe...the token ID is simply not filled in. if te != nil { + aEntry.TokenID = te.ID aEntry.AccessorID = te.Accessor } } @@ -1180,7 +1180,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData ts.logger.Debug("token: checking if accessors contain valid tokens", "progress", countAccessorList) } - accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor) + accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor, true) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) continue From 8d35f928452e2609691e139fa147cf7b3d690020 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 3 May 2017 12:45:22 -0400 Subject: [PATCH 28/41] consistent logging --- vault/token_store.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vault/token_store.go b/vault/token_store.go index 208883a5517c..683f7ab10d75 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1141,14 +1141,14 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData for _, parent := range parentList { children, err := ts.view.List(parentPrefix + parent + "/") if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read child index entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read secondary index entry: %v", err)) continue } for _, child := range children { countParentList++ if countParentList%500 == 0 { - ts.logger.Debug("token: checking validity of tokens in parent list", "progress", countParentList) + ts.logger.Debug("token: checking validity of tokens in secondary index list", "progress", countParentList) } // Look up tainted entries so we can be sure that if this isn't From a2e431bf7c7671c236a57c3c1287dbed4575b044 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 3 May 2017 12:46:54 -0400 Subject: [PATCH 29/41] Added logs when deletion fails so we can rely on server logs --- vault/token_store.go | 1 + 1 file changed, 1 insertion(+) diff --git a/vault/token_store.go b/vault/token_store.go index 683f7ab10d75..6d2c28c20e35 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1159,6 +1159,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData ts.logger.Debug("token: deleting invalid secondary index entry", "salted_token", child) err = ts.view.Delete(parentPrefix + parent + "/" + child) if err != nil { + ts.logger.Debug("token: failed to delete secondary index entry", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) } deletedCountParentList++ From 2d21bf657e98f86c9c07976b7750bd6e2a2f7fd5 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Wed, 3 May 2017 12:58:10 -0400 Subject: [PATCH 30/41] logging updates --- vault/token_store.go | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/vault/token_store.go b/vault/token_store.go index 6d2c28c20e35..e46cf0c00588 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1124,7 +1124,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // List out all the accessors saltedAccessorList, err := ts.view.List(accessorPrefix) if err != nil { - return nil, fmt.Errorf("failed to fetch accessor entries: %v", err) + return nil, fmt.Errorf("failed to fetch accessor index entries: %v", err) } // First, clean up secondary index entries that are no longer valid @@ -1141,7 +1141,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData for _, parent := range parentList { children, err := ts.view.List(parentPrefix + parent + "/") if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read secondary index entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read secondary index: %v", err)) continue } @@ -1156,11 +1156,12 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { - ts.logger.Debug("token: deleting invalid secondary index entry", "salted_token", child) - err = ts.view.Delete(parentPrefix + parent + "/" + child) + index := parentPrefix + parent + "/" + child + ts.logger.Debug("token: deleting invalid secondary index", "index", index) + err = ts.view.Delete(parentPrefix + parent + "/" + index) if err != nil { - ts.logger.Debug("token: failed to delete secondary index entry", "err", err) - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index entry: %v", err)) + ts.logger.Debug("token: failed to delete secondary index", "err", err) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index: %v", err)) } deletedCountParentList++ } @@ -1183,7 +1184,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor, true) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor entry: %v", err)) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read the accessor index: %v", err)) continue } @@ -1191,13 +1192,15 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // in it. If not, it is an invalid accessor entry and needs to // be deleted. if accessorEntry.TokenID == "" { - ts.logger.Debug("token: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID) + index := accessorPrefix + saltedAccessor + ts.logger.Debug("token: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID, "index", index) // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation - err = ts.view.Delete(accessorPrefix + saltedAccessor) + err = ts.view.Delete(index) if err != nil { - tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor entry: %v", err)) + ts.logger.Debug("token: failed to delete the accessor index", "err", err) + tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor index: %v", err)) continue } deletedCountAccessorEmptyToken++ @@ -1229,12 +1232,14 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // the leases associated with the token. err := ts.expiration.RevokeByToken(tokenEntry) if err != nil { + ts.logger.Debug("token: failed to revoke leases of expired token", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) continue } deletedCountInvalidTokenInAccessor++ - ts.logger.Debug("token: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID) + index := accessorPrefix + saltedAccessor + ts.logger.Debug("token: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID, "index", index) // If deletion of accessor fails, move on to the next item since // this is just a best-effort operation. We do this last so that on @@ -1242,6 +1247,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // entry to try again. err = ts.view.Delete(accessorPrefix + saltedAccessor) if err != nil { + ts.logger.Debug("token: failed to delete accessor index", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) continue } @@ -1251,9 +1257,9 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData ts.logger.Debug("token: ending tidy operation on tokens") - ts.logger.Debug("token: number of tokens scanned in parent list", "total", countParentList) - ts.logger.Debug("token: number of tokens revoked in parent list", "deleted_count", deletedCountParentList) - ts.logger.Debug("token: number of accessor scanned", "total", countAccessorList) + ts.logger.Debug("token: number of tokens scanned in parent index list", "total", countParentList) + ts.logger.Debug("token: number of tokens revoked in parent index list", "deleted_count", deletedCountParentList) + ts.logger.Debug("token: number of accessors scanned", "total", countAccessorList) ts.logger.Debug("token: number of deleted accessors which had empty tokens", "deleted_count", deletedCountAccessorEmptyToken) ts.logger.Debug("token: number of revoked tokens which were invalid but present in accessors", "deleted_count", deletedCountInvalidTokenInAccessor) ts.logger.Debug("token: number of deleted accessors which had invalid tokens", "deleted_count", deletedCountAccessorInvalidToken) From 5bc47b0c6042baa8784edd84408e694be0553f94 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 3 May 2017 13:08:47 -0400 Subject: [PATCH 31/41] Add taint flag for looking up by accessor --- vault/logical_system.go | 2 +- vault/token_store.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vault/logical_system.go b/vault/logical_system.go index a3f6ee44e98d..e45a505744a2 100644 --- a/vault/logical_system.go +++ b/vault/logical_system.go @@ -838,7 +838,7 @@ func (b *SystemBackend) handleCapabilitiesAccessor(req *logical.Request, d *fram return logical.ErrorResponse("missing accessor"), nil } - aEntry, err := b.Core.tokenStore.lookupByAccessor(accessor) + aEntry, err := b.Core.tokenStore.lookupByAccessor(accessor, false) if err != nil { return nil, err } diff --git a/vault/token_store.go b/vault/token_store.go index e46cf0c00588..c8cc24f2b437 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1072,8 +1072,8 @@ func (ts *TokenStore) handleCreateAgainstRole( return ts.handleCreateCommon(req, d, false, roleEntry) } -func (ts *TokenStore) lookupByAccessor(accessor string) (accessorEntry, error) { - return ts.lookupBySaltedAccessor(ts.SaltID(accessor), false) +func (ts *TokenStore) lookupByAccessor(accessor string, tainted bool) (accessorEntry, error) { + return ts.lookupBySaltedAccessor(ts.SaltID(accessor), tainted) } func (ts *TokenStore) lookupBySaltedAccessor(saltedAccessor string, tainted bool) (accessorEntry, error) { @@ -1280,7 +1280,7 @@ func (ts *TokenStore) handleUpdateLookupAccessor(req *logical.Request, data *fra urlaccessor = true } - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, false) if err != nil { return nil, err } @@ -1334,7 +1334,7 @@ func (ts *TokenStore) handleUpdateRevokeAccessor(req *logical.Request, data *fra urlaccessor = true } - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, true) if err != nil { return nil, err } From b0c4a7e26657d92e8d24a865dcfb0b497e80d2ed Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 3 May 2017 14:29:57 -0400 Subject: [PATCH 32/41] Add more cleanup if a lease fails to register and revoke tokens if registerauth fails --- vault/expiration.go | 25 ++++++++++++++++++------- vault/request_handling.go | 4 +++- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index fec2e79af351..d42c7b35b6db 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -605,7 +605,7 @@ func (m *ExpirationManager) RenewToken(req *logical.Request, source string, toke // Register is used to take a request and response with an associated // lease. The secret gets assigned a LeaseID and the management of // of lease is assumed by the expiration manager. -func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (leaseID string, retErr error) { +func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (id string, retErr error) { defer metrics.MeasureSince([]string{"expire", "register"}, time.Now()) // Ignore if there is no leased secret if resp == nil || resp.Secret == nil { @@ -617,6 +617,14 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons return "", err } + // Create a lease entry + leaseUUID, err := uuid.GenerateUUID() + if err != nil { + return "", err + } + + leaseID := path.Join(req.Path, leaseUUID) + defer func() { if retErr != nil { revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) @@ -625,6 +633,14 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons } else if revResp != nil && revResp.IsError() { retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", revResp.Error())) } + + if err := m.deleteEntry(leaseID); err != nil { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) + } + + if err := m.removeIndexByToken(req.ClientToken, leaseID); err != nil { + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) + } } }() @@ -632,13 +648,8 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons return "", fmt.Errorf("expiration: cannot register a lease with an empty client token") } - // Create a lease entry - leaseUUID, err := uuid.GenerateUUID() - if err != nil { - return "", err - } le := leaseEntry{ - LeaseID: path.Join(req.Path, leaseUUID), + LeaseID: leaseID, ClientToken: req.ClientToken, Path: req.Path, Data: resp.Data, diff --git a/vault/request_handling.go b/vault/request_handling.go index ad37b5aee0b6..d218d9967467 100644 --- a/vault/request_handling.go +++ b/vault/request_handling.go @@ -288,10 +288,11 @@ func (c *Core) handleRequest(req *logical.Request) (retResp *logical.Response, r if err != nil { c.logger.Error("core: failed to look up token", "error", err) retErr = multierror.Append(retErr, ErrInternalError) - return nil, nil, retErr + return nil, auth, retErr } if err := c.expiration.RegisterAuth(te.Path, resp.Auth); err != nil { + c.tokenStore.Revoke(te.ID) c.logger.Error("core: failed to register token lease", "request_path", req.Path, "error", err) retErr = multierror.Append(retErr, ErrInternalError) return nil, auth, retErr @@ -439,6 +440,7 @@ func (c *Core) handleLoginRequest(req *logical.Request) (*logical.Response, *log // Register with the expiration manager if err := c.expiration.RegisterAuth(te.Path, auth); err != nil { + c.tokenStore.Revoke(te.ID) c.logger.Error("core: failed to register token lease", "request_path", req.Path, "error", err) return nil, auth, ErrInternalError } From 2f6e924c292ff8f15bfb189674aafafb419bd9d7 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 3 May 2017 15:09:13 -0400 Subject: [PATCH 33/41] Fix substitution of index/child in delete call --- vault/token_store.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vault/token_store.go b/vault/token_store.go index e2be03442fe9..e4c572e8a0b1 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1139,7 +1139,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // Scan through the secondary index entries; if there is an entry // with the token's salt ID at the end, remove it for _, parent := range parentList { - children, err := ts.view.List(parentPrefix + parent + "/") + children, err := ts.view.List(parentPrefix + parent) if err != nil { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to read secondary index: %v", err)) continue @@ -1156,9 +1156,9 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // since appropriate locks cannot be held with salted token IDs. te, _ := ts.lookupSalted(child, true) if te == nil { - index := parentPrefix + parent + "/" + child + index := parentPrefix + parent + child ts.logger.Debug("token: deleting invalid secondary index", "index", index) - err = ts.view.Delete(parentPrefix + parent + "/" + index) + err = ts.view.Delete(index) if err != nil { ts.logger.Debug("token: failed to delete secondary index", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index: %v", err)) From 5320da03bbbe8e3d4ed451fde6846c998707d322 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 4 May 2017 09:40:11 -0400 Subject: [PATCH 34/41] Move tidy-leases to leases/tidy --- vault/logical_system.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/vault/logical_system.go b/vault/logical_system.go index a2a23013e109..bb7197151396 100644 --- a/vault/logical_system.go +++ b/vault/logical_system.go @@ -75,17 +75,6 @@ func NewSystemBackend(core *Core, config *logical.BackendConfig) (logical.Backen }, Paths: []*framework.Path{ - &framework.Path{ - Pattern: "tidy-leases$", - - Callbacks: map[logical.Operation]framework.OperationFunc{ - logical.UpdateOperation: b.handleTidyLeases, - }, - - HelpSynopsis: strings.TrimSpace(sysHelp["tidy_leases"][0]), - HelpDescription: strings.TrimSpace(sysHelp["tidy_leases"][1]), - }, - &framework.Path{ Pattern: "capabilities-accessor$", @@ -432,6 +421,17 @@ func NewSystemBackend(core *Core, config *logical.BackendConfig) (logical.Backen HelpDescription: strings.TrimSpace(sysHelp["revoke-prefix"][1]), }, + &framework.Path{ + Pattern: "leases/tidy$", + + Callbacks: map[logical.Operation]framework.OperationFunc{ + logical.UpdateOperation: b.handleTidyLeases, + }, + + HelpSynopsis: strings.TrimSpace(sysHelp["tidy_leases"][0]), + HelpDescription: strings.TrimSpace(sysHelp["tidy_leases"][1]), + }, + &framework.Path{ Pattern: "auth$", From cacf07210b5689d747d876e19336dc3090be97db Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 4 May 2017 11:54:57 -0400 Subject: [PATCH 35/41] Update commenting --- vault/expiration.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vault/expiration.go b/vault/expiration.go index 0501ff19ac7b..bd0a1e80bdf8 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -626,6 +626,10 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons leaseID := path.Join(req.Path, leaseUUID) defer func() { + // If there is an error we want to rollback as much as possible (note + // that errors here are ignored to do as much cleanup as we can). We + // want to revoke a generated secret (since an error means we may not + // be successfully tracking it), remove indexes, and delete the entry. if retErr != nil { revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) if err != nil { From 4de09fbad99f156b0fa82af4782d1c0aa2c0ebdd Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 4 May 2017 12:11:00 -0400 Subject: [PATCH 36/41] Update Tidy function comment --- vault/expiration.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vault/expiration.go b/vault/expiration.go index bd0a1e80bdf8..2d3adfc68183 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -120,7 +120,12 @@ func (c *Core) stopExpiration() error { return nil } -// Tidy cleans up stale leases which are associated with invalid tokens +// Tidy cleans up the dangling storage entries for leases. It scans the storage +// view to find all the available leases, checks if the token embedded in it is +// either empty or invalid and in both the cases, it revokes them. It also uses +// a token cache to avoid multiple lookups of the same token ID. It is normally +// not required to use the API that invokes this. This is only intended to +// clean up the corrupt storage due to bugs. func (m *ExpirationManager) Tidy() error { var tidyErrors *multierror.Error From 106f08a0ffa2867323091b8c5160ddbd01c50a55 Mon Sep 17 00:00:00 2001 From: vishalnayak Date: Thu, 4 May 2017 12:41:15 -0400 Subject: [PATCH 37/41] Fix up the tests --- vault/expiration.go | 2 +- vault/expiration_test.go | 46 +++++++++++++++++++++++++-------------- vault/token_store_test.go | 6 ++--- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 2d3adfc68183..6268031fc654 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -196,8 +196,8 @@ func (m *ExpirationManager) Tidy() error { return } else { m.logger.Debug("expiration: revoking lease which contains an invalid token", "lease_id", leaseID) - deletedCountInvalidToken++ revokeLease = true + deletedCountInvalidToken++ } } diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 92ae92fdc7d4..7598611d9eee 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -221,6 +221,12 @@ func TestExpiration_Tidy(t *testing.T) { t.Fatalf("error persisting entry: %v", err) } + // Run the tidy operation + err = exp.Tidy() + if err != nil { + t.Fatal(err) + } + count = 0 if err = logical.ScanView(exp.idView, countFunc); err != nil { t.Fatal(err) @@ -344,8 +350,9 @@ func TestExpiration_Restore(t *testing.T) { } for _, path := range paths { req := &logical.Request{ - Operation: logical.ReadOperation, - Path: path, + Operation: logical.ReadOperation, + Path: path, + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -399,8 +406,9 @@ func TestExpiration_Restore(t *testing.T) { func TestExpiration_Register(t *testing.T) { exp := mockExpiration(t) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -498,8 +506,9 @@ func TestExpiration_Revoke(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -540,8 +549,9 @@ func TestExpiration_RevokeOnExpire(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -599,8 +609,9 @@ func TestExpiration_RevokePrefix(t *testing.T) { } for _, path := range paths { req := &logical.Request{ - Operation: logical.ReadOperation, - Path: path, + Operation: logical.ReadOperation, + Path: path, + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -787,8 +798,9 @@ func TestExpiration_Renew(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -853,8 +865,9 @@ func TestExpiration_Renew_NotRenewable(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ @@ -899,8 +912,9 @@ func TestExpiration_Renew_RevokeOnExpire(t *testing.T) { exp.router.Mount(noop, "prod/aws/", &MountEntry{UUID: meUUID}, view) req := &logical.Request{ - Operation: logical.ReadOperation, - Path: "prod/aws/foo", + Operation: logical.ReadOperation, + Path: "prod/aws/foo", + ClientToken: "foobar", } resp := &logical.Response{ Secret: &logical.Secret{ diff --git a/vault/token_store_test.go b/vault/token_store_test.go index 7a84fe737f2c..d802080d1f8d 100644 --- a/vault/token_store_test.go +++ b/vault/token_store_test.go @@ -240,7 +240,7 @@ func TestTokenStore_AccessorIndex(t *testing.T) { t.Fatalf("bad: %#v", out) } - aEntry, err := ts.lookupByAccessor(out.Accessor) + aEntry, err := ts.lookupByAccessor(out.Accessor, false) if err != nil { t.Fatalf("err: %s", err) } @@ -318,7 +318,7 @@ func TestTokenStore_HandleRequest_ListAccessors(t *testing.T) { // Test upgrade from old struct method of accessor storage (of token id) for _, accessor := range keys { - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, false) if err != nil { t.Fatal(err) } @@ -352,7 +352,7 @@ func TestTokenStore_HandleRequest_ListAccessors(t *testing.T) { } for _, accessor := range keys2 { - aEntry, err := ts.lookupByAccessor(accessor) + aEntry, err := ts.lookupByAccessor(accessor, false) if err != nil { t.Fatal(err) } From 782910736c6300eb46eb4619aae2fc370460cdc6 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 4 May 2017 12:44:31 -0400 Subject: [PATCH 38/41] Update comments --- vault/expiration.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 6268031fc654..553407c334b6 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -638,17 +638,17 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons if retErr != nil { revResp, err := m.router.Route(logical.RevokeRequest(req.Path, resp.Secret, resp.Data)) if err != nil { - retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional internal error was encountered revoking the newly-generated secret: {{err}}", err)) } else if revResp != nil && revResp.IsError() { retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", revResp.Error())) } if err := m.deleteEntry(leaseID); err != nil { - retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered deleting any lease associated with the newly-generated secret: {{err}}", err)) } if err := m.removeIndexByToken(req.ClientToken, leaseID); err != nil { - retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered revoking the newly-generated secret: {{err}}", err)) + retErr = multierror.Append(retErr, errwrap.Wrapf("an additional error was encountered removing lease indexes associated with the newly-generated secret: {{err}}", err)) } } }() From 1378dd5c6a09d44e3eff1096f08f38ccf283f121 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Thu, 4 May 2017 12:45:57 -0400 Subject: [PATCH 39/41] Move client token check in exp register to top --- vault/expiration.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 553407c334b6..1bdeb4342872 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -612,6 +612,11 @@ func (m *ExpirationManager) RenewToken(req *logical.Request, source string, toke // of lease is assumed by the expiration manager. func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Response) (id string, retErr error) { defer metrics.MeasureSince([]string{"expire", "register"}, time.Now()) + + if req.ClientToken == "" { + return "", fmt.Errorf("expiration: cannot register a lease with an empty client token") + } + // Ignore if there is no leased secret if resp == nil || resp.Secret == nil { return "", nil @@ -653,10 +658,6 @@ func (m *ExpirationManager) Register(req *logical.Request, resp *logical.Respons } }() - if req.ClientToken == "" { - return "", fmt.Errorf("expiration: cannot register a lease with an empty client token") - } - le := leaseEntry{ LeaseID: leaseID, ClientToken: req.ClientToken, From 5dde45d67718e4e63c8b872941c521160d7f24bf Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 5 May 2017 10:26:40 -0400 Subject: [PATCH 40/41] Address feedback --- vault/expiration.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vault/expiration.go b/vault/expiration.go index 1bdeb4342872..9c1e2105edc5 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -191,6 +191,7 @@ func (m *ExpirationManager) Tidy() error { } else { tokenCache[le.ClientToken] = true } + goto REVOKE_CHECK } else { if isValid { return @@ -199,6 +200,7 @@ func (m *ExpirationManager) Tidy() error { revokeLease = true deletedCountInvalidToken++ } + goto REVOKE_CHECK } REVOKE_CHECK: From e61298e6d418220133199c3450fedca32acface0 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 5 May 2017 10:48:12 -0400 Subject: [PATCH 41/41] Update debugging around tidy --- vault/expiration.go | 27 +++++++++++++------------- vault/token_store.go | 46 ++++++++++++++++++-------------------------- 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 9c1e2105edc5..73b47b4090dd 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -130,24 +130,23 @@ func (m *ExpirationManager) Tidy() error { var tidyErrors *multierror.Error if !atomic.CompareAndSwapInt64(&m.tidyLock, 0, 1) { - m.logger.Debug("expiration: tidy operation on leases is already in progress") + m.logger.Warn("expiration: tidy operation on leases is already in progress") return fmt.Errorf("tidy operation on leases is already in progress") } defer atomic.CompareAndSwapInt64(&m.tidyLock, 1, 0) - m.logger.Debug("expiration: beginning tidy operation on leases") + m.logger.Info("expiration: beginning tidy operation on leases") + defer m.logger.Info("expiration: finished tidy operation on leases") // Create a cache to keep track of looked up tokens tokenCache := make(map[string]bool) - countLease := 0 - deletedCountEmptyToken := 0 - deletedCountInvalidToken := 0 + var countLease, revokedCount, deletedCountInvalidToken, deletedCountEmptyToken int64 tidyFunc := func(leaseID string) { countLease++ if countLease%500 == 0 { - m.logger.Debug("expiration: tidying leases", "progress", countLease) + m.logger.Info("expiration: tidying leases", "progress", countLease) } le, err := m.loadEntry(leaseID) @@ -164,7 +163,7 @@ func (m *ExpirationManager) Tidy() error { var isValid, ok bool revokeLease := false if le.ClientToken == "" { - m.logger.Debug("expiration: revoking lease which has an empty token", "lease_id", leaseID) + m.logger.Trace("expiration: revoking lease which has an empty token", "lease_id", leaseID) revokeLease = true deletedCountEmptyToken++ goto REVOKE_CHECK @@ -184,7 +183,7 @@ func (m *ExpirationManager) Tidy() error { } if te == nil { - m.logger.Debug("expiration: revoking lease which holds an invalid token", "lease_id", leaseID) + m.logger.Trace("expiration: revoking lease which holds an invalid token", "lease_id", leaseID) revokeLease = true deletedCountInvalidToken++ tokenCache[le.ClientToken] = false @@ -196,7 +195,7 @@ func (m *ExpirationManager) Tidy() error { if isValid { return } else { - m.logger.Debug("expiration: revoking lease which contains an invalid token", "lease_id", leaseID) + m.logger.Trace("expiration: revoking lease which contains an invalid token", "lease_id", leaseID) revokeLease = true deletedCountInvalidToken++ } @@ -212,6 +211,7 @@ func (m *ExpirationManager) Tidy() error { tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke an invalid lease with ID %q: %v", leaseID, err)) return } + revokedCount++ } } @@ -219,11 +219,10 @@ func (m *ExpirationManager) Tidy() error { return err } - m.logger.Debug("expiration: ending tidy operation on leases") - - m.logger.Debug("expiration: number of leases scanned", "total", countLease) - m.logger.Debug("expiration: number of revoked leases which had empty tokens", "deleted_count", deletedCountEmptyToken) - m.logger.Debug("expiration: number of revoked leases which had invalid tokens", "deleted_count", deletedCountInvalidToken) + m.logger.Debug("expiration: number of leases scanned", "count", countLease) + m.logger.Debug("expiration: number of leases which had empty tokens", "count", deletedCountEmptyToken) + m.logger.Debug("expiration: number of leases which had invalid tokens", "count", deletedCountInvalidToken) + m.logger.Debug("expiration: number of leases successfully revoked", "count", revokedCount) return tidyErrors.ErrorOrNil() } diff --git a/vault/token_store.go b/vault/token_store.go index 7678dce99230..499d19b0200a 100644 --- a/vault/token_store.go +++ b/vault/token_store.go @@ -1114,13 +1114,15 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData var tidyErrors *multierror.Error if !atomic.CompareAndSwapInt64(&ts.tidyLock, 0, 1) { - ts.logger.Debug("token: tidy operation on tokens is already in progress") + ts.logger.Warn("token: tidy operation on tokens is already in progress") return nil, fmt.Errorf("tidy operation on tokens is already in progress") } defer atomic.CompareAndSwapInt64(&ts.tidyLock, 1, 0) ts.logger.Debug("token: beginning tidy operation on tokens") + defer ts.logger.Debug("token: finished tidy operation on tokens") + // List out all the accessors saltedAccessorList, err := ts.view.List(accessorPrefix) if err != nil { @@ -1133,8 +1135,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData return nil, fmt.Errorf("failed to fetch secondary index entries: %v", err) } - countParentList := 0 - deletedCountParentList := 0 + var countParentList, deletedCountParentList int64 // Scan through the secondary index entries; if there is an entry // with the token's salt ID at the end, remove it @@ -1148,7 +1149,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData for _, child := range children { countParentList++ if countParentList%500 == 0 { - ts.logger.Debug("token: checking validity of tokens in secondary index list", "progress", countParentList) + ts.logger.Info("token: checking validity of tokens in secondary index list", "progress", countParentList) } // Look up tainted entries so we can be sure that if this isn't @@ -1157,10 +1158,9 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData te, _ := ts.lookupSalted(child, true) if te == nil { index := parentPrefix + parent + child - ts.logger.Debug("token: deleting invalid secondary index", "index", index) + ts.logger.Trace("token: deleting invalid secondary index", "index", index) err = ts.view.Delete(index) if err != nil { - ts.logger.Debug("token: failed to delete secondary index", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete secondary index: %v", err)) } deletedCountParentList++ @@ -1168,10 +1168,10 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } } - countAccessorList := 0 - deletedCountAccessorEmptyToken := 0 - deletedCountAccessorInvalidToken := 0 - deletedCountInvalidTokenInAccessor := 0 + var countAccessorList, + deletedCountAccessorEmptyToken, + deletedCountAccessorInvalidToken, + deletedCountInvalidTokenInAccessor int64 // For each of the accessor, see if the token ID associated with it is // a valid one. If not, delete the leases associated with that token @@ -1179,7 +1179,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData for _, saltedAccessor := range saltedAccessorList { countAccessorList++ if countAccessorList%500 == 0 { - ts.logger.Debug("token: checking if accessors contain valid tokens", "progress", countAccessorList) + ts.logger.Info("token: checking if accessors contain valid tokens", "progress", countAccessorList) } accessorEntry, err := ts.lookupBySaltedAccessor(saltedAccessor, true) @@ -1193,13 +1193,10 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // be deleted. if accessorEntry.TokenID == "" { index := accessorPrefix + saltedAccessor - ts.logger.Debug("token: deleting accessor index with invalid token ID", "accessor_id", accessorEntry.AccessorID, "index", index) - // If deletion of accessor fails, move on to the next // item since this is just a best-effort operation err = ts.view.Delete(index) if err != nil { - ts.logger.Debug("token: failed to delete the accessor index", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete the accessor index: %v", err)) continue } @@ -1219,7 +1216,7 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // more and conclude that accessor, leases, and secondary index entries // for this token should not exist as well. if te == nil { - ts.logger.Debug("token: deleting token with nil entry", "salted_token", saltedId) + ts.logger.Info("token: deleting token with nil entry", "salted_token", saltedId) // RevokeByToken expects a '*TokenEntry'. For the // purposes of tidying, it is sufficient if the token @@ -1232,22 +1229,19 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData // the leases associated with the token. err := ts.expiration.RevokeByToken(tokenEntry) if err != nil { - ts.logger.Debug("token: failed to revoke leases of expired token", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to revoke leases of expired token: %v", err)) continue } deletedCountInvalidTokenInAccessor++ index := accessorPrefix + saltedAccessor - ts.logger.Debug("token: deleting accessor index of the token with nil entry", "accessor_id", accessorEntry.AccessorID, "index", index) // If deletion of accessor fails, move on to the next item since // this is just a best-effort operation. We do this last so that on // next run if something above failed we still have the accessor // entry to try again. - err = ts.view.Delete(accessorPrefix + saltedAccessor) + err = ts.view.Delete(index) if err != nil { - ts.logger.Debug("token: failed to delete accessor index", "err", err) tidyErrors = multierror.Append(tidyErrors, fmt.Errorf("failed to delete accessor entry: %v", err)) continue } @@ -1255,14 +1249,12 @@ func (ts *TokenStore) handleTidy(req *logical.Request, data *framework.FieldData } } - ts.logger.Debug("token: ending tidy operation on tokens") - - ts.logger.Debug("token: number of tokens scanned in parent index list", "total", countParentList) - ts.logger.Debug("token: number of tokens revoked in parent index list", "deleted_count", deletedCountParentList) - ts.logger.Debug("token: number of accessors scanned", "total", countAccessorList) - ts.logger.Debug("token: number of deleted accessors which had empty tokens", "deleted_count", deletedCountAccessorEmptyToken) - ts.logger.Debug("token: number of revoked tokens which were invalid but present in accessors", "deleted_count", deletedCountInvalidTokenInAccessor) - ts.logger.Debug("token: number of deleted accessors which had invalid tokens", "deleted_count", deletedCountAccessorInvalidToken) + ts.logger.Debug("token: number of tokens scanned in parent index list", "count", countParentList) + ts.logger.Debug("token: number of tokens revoked in parent index list", "count", deletedCountParentList) + ts.logger.Debug("token: number of accessors scanned", "count", countAccessorList) + ts.logger.Debug("token: number of deleted accessors which had empty tokens", "count", deletedCountAccessorEmptyToken) + ts.logger.Debug("token: number of revoked tokens which were invalid but present in accessors", "count", deletedCountInvalidTokenInAccessor) + ts.logger.Debug("token: number of deleted accessors which had invalid tokens", "count", deletedCountAccessorInvalidToken) return nil, tidyErrors.ErrorOrNil() }