From 7c8c3f5f93ec691d77968e1ef9f781af55144a1d Mon Sep 17 00:00:00 2001 From: Joel Thompson Date: Mon, 27 Aug 2018 16:35:56 -0400 Subject: [PATCH 1/4] logical/aws: Harden WAL entry creation If AWS IAM user creation failed in any way, the WAL corresponding to the IAM user would get left around and Vault would try to roll it back. However, because the user never existed, the rollback failed. Thus, the WAL would essentially get "stuck" and Vault would continually attempt to roll it back, failing every time. A similar situation could arise if the IAM user that Vault created got deleted out of band, or if Vault deleted it but was unable to write the lease revocation back to storage (e.g., a storage failure). This attempts to harden it in two ways. One is by deleting the WAL log entry if the IAM user creation fails. However, the WAL deletion could still fail, and this wouldn't help where the user is deleted out of band, so second, consider the user rolled back if the user just doesn't exist, under certain circumstances. Fixes #5190 --- builtin/logical/aws/path_user.go | 34 +++++++++++++++++++++++ builtin/logical/aws/secret_access_keys.go | 4 +++ logical/lease.go | 2 +- vault/expiration.go | 2 ++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/builtin/logical/aws/path_user.go b/builtin/logical/aws/path_user.go index 0d541546f70f..4462adc025d3 100644 --- a/builtin/logical/aws/path_user.go +++ b/builtin/logical/aws/path_user.go @@ -4,8 +4,10 @@ import ( "context" "fmt" "strings" + "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/iam" "github.com/hashicorp/errwrap" "github.com/hashicorp/vault/helper/strutil" @@ -130,6 +132,38 @@ func pathUserRollback(ctx context.Context, req *logical.Request, _kind string, d MaxItems: aws.Int64(1000), }) if err != nil { + // This isn't guaranteed to be perfect; for example, an IAM user + // might have gotten put into the WAL but then the IAM user creation + // failed (e.g., Vault didn't have permissions) and then the WAL + // deletion failed as well. Then, if Vault doesn't have access to + // call iam:ListGroupsForUser, AWS will return an access denied error + // and the WAL will never get cleaned up. But this is better than + // just having Vault "forget" about a user it actually created. + // + // BEWARE a potential race condition -- where this is called + // immediately after a user is created. AWS eventual consistency + // might say the user doesn't exist when the user does in fact + // exist, and this could cause Vault to forget about the user. + // This won't happen if the user creation fails (because the WAL + // minimum age is 5 minutes, and AWS eventual consistency is, in + // practice, never that long), but it could happen if a lease holder + // asks immediately after getting a user to revoke the lease, causing + // Vault to leake the secret, which would be a Very Bad Thing to allow. + // So we make sure that, if there's an associated lease, it must be at + // least 5 minutes old as well. + if aerr, ok := err.(awserr.Error); ok { + acceptMissingIamUsers := false + if req.Secret == nil { + // WAL rollback + acceptMissingIamUsers = true + } + if time.Since(req.Secret.IssueTime) > time.Duration(5*time.Minute) { + acceptMissingIamUsers = true + } + if aerr.Code() == iam.ErrCodeNoSuchEntityException && acceptMissingIamUsers { + return nil + } + } return err } groups := groupsResp.Groups diff --git a/builtin/logical/aws/secret_access_keys.go b/builtin/logical/aws/secret_access_keys.go index 57e1105da69b..6dc7abe16a16 100644 --- a/builtin/logical/aws/secret_access_keys.go +++ b/builtin/logical/aws/secret_access_keys.go @@ -184,6 +184,10 @@ func (b *backend) secretAccessKeysCreate( UserName: aws.String(username), }) if err != nil { + if walErr := framework.DeleteWAL(ctx, s, walId); walErr != nil { + iamErr := errwrap.Wrapf("error creating IAM user: {{err}}", err) + return nil, errwrap.Wrap(errwrap.Wrapf("failed to delete WAL entry: {{err}}", walErr), iamErr) + } return logical.ErrorResponse(fmt.Sprintf( "Error creating IAM user: %s", err)), nil } diff --git a/logical/lease.go b/logical/lease.go index 90c089476618..97bbe4f6582b 100644 --- a/logical/lease.go +++ b/logical/lease.go @@ -23,7 +23,7 @@ type LeaseOptions struct { Increment time.Duration `json:"-"` // IssueTime is the time of issue for the original lease. This is - // only available on a Renew operation and has no effect when returning + // only available on Renew and Revoke operations and has no effect when returning // a response. It can be used to enforce maximum lease periods by // a logical backend. IssueTime time.Time `json:"-"` diff --git a/vault/expiration.go b/vault/expiration.go index a1420f765643..55ee160cb976 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -1155,6 +1155,8 @@ func (m *ExpirationManager) revokeEntry(ctx context.Context, le *leaseEntry) err return nil } + le.Secret.IssueTime = le.IssueTime + // Handle standard revocation via backends resp, err := m.router.Route(m.quitContext, logical.RevokeRequest(le.Path, le.Secret, le.Data)) if err != nil || (resp != nil && resp.IsError()) { From 511a89d653d1a954177657c1cbc914ae78da3cf7 Mon Sep 17 00:00:00 2001 From: Joel Thompson Date: Mon, 27 Aug 2018 23:29:06 -0400 Subject: [PATCH 2/4] Fix segfault in expiration unit tests TestExpiration_Tidy was passing in a leaseEntry that had a nil Secret, which then caused a segfault as the changes to revokeEntry didn't check whether Secret was nil; this is probably unlikely to occur in real life, but good to be extra cautious. --- vault/expiration.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vault/expiration.go b/vault/expiration.go index 55ee160cb976..ef7b6a35b560 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -1155,7 +1155,12 @@ func (m *ExpirationManager) revokeEntry(ctx context.Context, le *leaseEntry) err return nil } - le.Secret.IssueTime = le.IssueTime + if le.Secret != nil { + // not sure if this is really valid to have a leaseEntry with a nil Secret + // (if there's a nil Secret, what are you really leasing?), but the tests + // create one, and good to be defensive + le.Secret.IssueTime = le.IssueTime + } // Handle standard revocation via backends resp, err := m.router.Route(m.quitContext, logical.RevokeRequest(le.Path, le.Secret, le.Data)) From 03a8d3fd1d7fbab305ff1dcb82db0cb682688ebd Mon Sep 17 00:00:00 2001 From: Joel Thompson Date: Wed, 29 Aug 2018 22:22:47 -0400 Subject: [PATCH 3/4] Fix potential segfault Missed the else... --- builtin/logical/aws/path_user.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/builtin/logical/aws/path_user.go b/builtin/logical/aws/path_user.go index 4462adc025d3..093a13ef1aa1 100644 --- a/builtin/logical/aws/path_user.go +++ b/builtin/logical/aws/path_user.go @@ -156,8 +156,7 @@ func pathUserRollback(ctx context.Context, req *logical.Request, _kind string, d if req.Secret == nil { // WAL rollback acceptMissingIamUsers = true - } - if time.Since(req.Secret.IssueTime) > time.Duration(5*time.Minute) { + } else if time.Since(req.Secret.IssueTime) > time.Duration(5*time.Minute) { acceptMissingIamUsers = true } if aerr.Code() == iam.ErrCodeNoSuchEntityException && acceptMissingIamUsers { From 7267c9e4d3c1eebb8df545e11160f0088871db57 Mon Sep 17 00:00:00 2001 From: Joel Thompson Date: Thu, 13 Sep 2018 20:46:10 -0400 Subject: [PATCH 4/4] Respond to PR feedback --- builtin/logical/aws/backend.go | 4 +++- builtin/logical/aws/path_user.go | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/logical/aws/backend.go b/builtin/logical/aws/backend.go index 1c1f04b6807f..4a7268f58970 100644 --- a/builtin/logical/aws/backend.go +++ b/builtin/logical/aws/backend.go @@ -45,7 +45,7 @@ func Backend() *backend { }, WALRollback: b.walRollback, - WALRollbackMinAge: 5 * time.Minute, + WALRollbackMinAge: minAwsUserRollbackAge, BackendType: logical.TypeLogical, } @@ -68,3 +68,5 @@ After mounting this backend, credentials to generate IAM keys must be configured with the "root" path and policies must be written using the "roles/" endpoints before any access keys can be generated. ` + +const minAwsUserRollbackAge = 5 * time.Minute diff --git a/builtin/logical/aws/path_user.go b/builtin/logical/aws/path_user.go index 093a13ef1aa1..12b7956507ad 100644 --- a/builtin/logical/aws/path_user.go +++ b/builtin/logical/aws/path_user.go @@ -148,16 +148,14 @@ func pathUserRollback(ctx context.Context, req *logical.Request, _kind string, d // minimum age is 5 minutes, and AWS eventual consistency is, in // practice, never that long), but it could happen if a lease holder // asks immediately after getting a user to revoke the lease, causing - // Vault to leake the secret, which would be a Very Bad Thing to allow. + // Vault to leak the secret, which would be a Very Bad Thing to allow. // So we make sure that, if there's an associated lease, it must be at // least 5 minutes old as well. if aerr, ok := err.(awserr.Error); ok { acceptMissingIamUsers := false - if req.Secret == nil { + if req.Secret == nil || time.Since(req.Secret.IssueTime) > time.Duration(minAwsUserRollbackAge) { // WAL rollback acceptMissingIamUsers = true - } else if time.Since(req.Secret.IssueTime) > time.Duration(5*time.Minute) { - acceptMissingIamUsers = true } if aerr.Code() == iam.ErrCodeNoSuchEntityException && acceptMissingIamUsers { return nil