diff --git a/command/agent/auth/auth.go b/command/agent/auth/auth.go index 0a14d8761db9..0a291b79a1e9 100644 --- a/command/agent/auth/auth.go +++ b/command/agent/auth/auth.go @@ -7,6 +7,7 @@ import ( hclog "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/api" + "github.com/hashicorp/vault/command/agent" "github.com/hashicorp/vault/sdk/helper/jsonutil" ) @@ -99,7 +100,7 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) { credCh = make(chan struct{}) } - var renewer *api.Renewer + var renewer *agent.Renewer for { select { @@ -196,7 +197,7 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) { renewer.Stop() } - renewer, err = ah.client.NewRenewer(&api.RenewerInput{ + renewer, err = agent.NewRenewer(ah.client, &agent.RenewerInput{ Secret: secret, }) if err != nil { diff --git a/command/agent/cache/lease_cache.go b/command/agent/cache/lease_cache.go index 6b459b170cbe..3780831b425d 100644 --- a/command/agent/cache/lease_cache.go +++ b/command/agent/cache/lease_cache.go @@ -18,6 +18,7 @@ import ( "github.com/hashicorp/errwrap" hclog "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/api" + "github.com/hashicorp/vault/command/agent" cachememdb "github.com/hashicorp/vault/command/agent/cache/cachememdb" "github.com/hashicorp/vault/helper/namespace" nshelper "github.com/hashicorp/vault/helper/namespace" @@ -394,7 +395,7 @@ func (c *LeaseCache) startRenewing(ctx context.Context, index *cachememdb.Index, client.SetToken(req.Token) client.SetHeaders(req.Request.Header) - renewer, err := client.NewRenewer(&api.RenewerInput{ + renewer, err := agent.NewRenewer(client, &agent.RenewerInput{ Secret: secret, }) if err != nil { diff --git a/command/agent/renewer.go b/command/agent/renewer.go new file mode 100644 index 000000000000..836bc61d3a95 --- /dev/null +++ b/command/agent/renewer.go @@ -0,0 +1,378 @@ +package agent + +import ( + "errors" + "math/rand" + "sync" + "time" + + "github.com/hashicorp/vault/api" +) + +var ( + ErrRenewerMissingInput = errors.New("missing input to renewer") + ErrRenewerMissingSecret = errors.New("missing secret to renew") + ErrRenewerNotRenewable = errors.New("secret is not renewable") + ErrRenewerNoSecretData = errors.New("returned empty secret data") + + // DefaultRenewerRenewBuffer is the default size of the buffer for renew + // messages on the channel. + DefaultRenewerRenewBuffer = 5 +) + +// Renewer is a process for renewing a secret. +// +// renewer, err := client.NewRenewer(&RenewerInput{ +// Secret: mySecret, +// }) +// go renewer.Renew() +// defer renewer.Stop() +// +// for { +// select { +// case err := <-renewer.DoneCh(): +// if err != nil { +// log.Fatal(err) +// } +// +// // Renewal is now over +// case renewal := <-renewer.RenewCh(): +// log.Printf("Successfully renewed: %#v", renewal) +// } +// } +// +// +// The `DoneCh` will return if renewal fails or if the remaining lease duration +// after a renewal is less than or equal to the grace (in number of seconds). +// In both cases, the caller should attempt a re-read of the secret or +// reauthenticate to get a new token. Clients should check the return value of +// the channel to see if renewal was successful. +type Renewer struct { + l sync.Mutex + + client *api.Client + secret *api.Secret + grace time.Duration + random *rand.Rand + increment int + doneCh chan error + renewCh chan *renewOutput + + stopped bool + stopCh chan struct{} +} + +// RenewerInput is used as input to the renew function. +type RenewerInput struct { + // Secret is the secret to renew + Secret *api.Secret + + // DEPRECATED: this does not do anything. + Grace time.Duration + + // Rand is the randomizer to use for underlying randomization. If not + // provided, one will be generated and seeded automatically. If provided, it + // is assumed to have already been seeded. + Rand *rand.Rand + + // RenewBuffer is the size of the buffered channel where renew messages are + // dispatched. + RenewBuffer int + + // The new TTL, in seconds, that should be set on the lease. The TTL set + // here may or may not be honored by the vault server, based on Vault + // configuration or any associated max TTL values. + Increment int +} + +// renewOutput is the metadata returned to the client (if it's listening) to +// renew messages. +type renewOutput struct { + // RenewedAt is the timestamp when the renewal took place (UTC). + RenewedAt time.Time + + // Secret is the underlying renewal data. It's the same struct as all data + // that is returned from Vault, but since this is renewal data, it will not + // usually include the secret itself. + Secret *api.Secret +} + +// NewRenewer creates a new Renewer from the given input. +func NewRenewer(c *api.Client, i *RenewerInput) (*Renewer, error) { + if i == nil { + return nil, ErrRenewerMissingInput + } + + secret := i.Secret + if secret == nil { + return nil, ErrRenewerMissingSecret + } + + random := i.Rand + if random == nil { + random = rand.New(rand.NewSource(int64(time.Now().Nanosecond()))) + } + + renewBuffer := i.RenewBuffer + if renewBuffer == 0 { + renewBuffer = DefaultRenewerRenewBuffer + } + + return &Renewer{ + client: c, + secret: secret, + increment: i.Increment, + random: random, + doneCh: make(chan error, 1), + renewCh: make(chan *renewOutput, renewBuffer), + + stopped: false, + stopCh: make(chan struct{}), + }, nil +} + +// DoneCh returns the channel where the Renewer will publish when renewal stops. +// If there is an error, this will be an error. +func (r *Renewer) DoneCh() <-chan error { + return r.doneCh +} + +// RenewCh is a channel that receives a message when a successful renewal takes +// place and includes metadata about the renewal. +func (r *Renewer) RenewCh() <-chan *renewOutput { + return r.renewCh +} + +// Stop stops the Renewer. +func (r *Renewer) Stop() { + r.l.Lock() + if !r.stopped { + close(r.stopCh) + r.stopped = true + } + r.l.Unlock() +} + +// Renew starts a background process for renewing this secret. When the secret +// has auth data, this attempts to renew the auth (token). When the secret has +// a lease, this attempts to renew the lease. +func (r *Renewer) Renew() { + var result error + if r.secret.Auth != nil { + result = r.renewAuth() + } else { + result = r.renewLease() + } + + r.doneCh <- result +} + +// renewAuth is a helper for renewing authentication. +func (r *Renewer) renewAuth() error { + if r.secret.Auth.ClientToken == "" { + return ErrRenewerNotRenewable + } + + nonRenewable := !r.secret.Auth.Renewable + + initialTime := time.Now() + priorDuration := time.Duration(r.secret.Auth.LeaseDuration) * time.Second + r.calculateGrace(priorDuration) + + client, token := r.client, r.secret.Auth.ClientToken + + for { + // Check if we are stopped. + select { + case <-r.stopCh: + return nil + default: + } + + var leaseDuration time.Duration + + switch { + case nonRenewable: + // Can't renew, just keep same expiration so we exit when it's + // reauthentication time + leaseDuration = initialTime.Add(priorDuration).Sub(time.Now()) + + // Push a message that a renewal took place. + default: + renewal, err := client.Auth().Token().RenewTokenAsSelf(token, r.increment) + if err != nil { + return err + } + + // Push a message that a renewal took place. + select { + case r.renewCh <- &renewOutput{time.Now().UTC(), renewal}: + default: + } + + // Somehow, sometimes, this happens. + if renewal == nil || renewal.Auth == nil { + return ErrRenewerNoSecretData + } + + // Do nothing if we are not renewable + if !renewal.Auth.Renewable { + return ErrRenewerNotRenewable + } + + // Grab the lease duration + leaseDuration = time.Duration(renewal.Auth.LeaseDuration) * time.Second + } + + // We keep evaluating a new grace period so long as the lease is + // extending. Once it stops extending, we've hit the max and need to + // rely on the grace duration. + if leaseDuration > priorDuration { + r.calculateGrace(leaseDuration) + } + priorDuration = leaseDuration + + // The sleep duration is set to 2/3 of the current lease duration plus + // 1/3 of the current grace period, which adds jitter. + sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds())/3) + + // If we are within grace, return now; or, if the amount of time we + // would sleep would land us in the grace period. This helps with short + // tokens; for example, you don't want a current lease duration of 4 + // seconds, a grace period of 3 seconds, and end up sleeping for more + // than three of those seconds and having a very small budget of time + // to renew. + if leaseDuration <= r.grace || leaseDuration-sleepDuration <= r.grace { + return nil + } + + select { + case <-r.stopCh: + return nil + case <-time.After(sleepDuration): + continue + } + } +} + +// renewLease is a helper for renewing a lease. +func (r *Renewer) renewLease() error { + if r.secret.LeaseID == "" { + return ErrRenewerNotRenewable + } + + nonRenewable := !r.secret.Renewable + + initialTime := time.Now() + priorDuration := time.Duration(r.secret.LeaseDuration) * time.Second + r.calculateGrace(priorDuration) + + client, leaseID := r.client, r.secret.LeaseID + + for { + // Check if we are stopped. + select { + case <-r.stopCh: + return nil + default: + } + + var leaseDuration time.Duration + + switch { + case nonRenewable: + // Can't renew, just keep same expiration so we exit when it's + // reread time + leaseDuration = initialTime.Add(priorDuration).Sub(time.Now()) + + default: + // Renew the lease. + renewal, err := client.Sys().Renew(leaseID, r.increment) + if err != nil { + return err + } + + // Push a message that a renewal took place. + select { + case r.renewCh <- &renewOutput{time.Now().UTC(), renewal}: + default: + } + + // Somehow, sometimes, this happens. + if renewal == nil { + return ErrRenewerNoSecretData + } + + // Do nothing if we are not renewable + if !renewal.Renewable { + return ErrRenewerNotRenewable + } + + // Grab the lease duration + leaseDuration = time.Duration(renewal.LeaseDuration) * time.Second + } + + // We keep evaluating a new grace period so long as the lease is + // extending. Once it stops extending, we've hit the max and need to + // rely on the grace duration. + if leaseDuration > priorDuration { + r.calculateGrace(leaseDuration) + } + priorDuration = leaseDuration + + // The sleep duration is set to 2/3 of the current lease duration plus + // 1/3 of the current grace period, which adds jitter. + sleepDuration := time.Duration(float64(leaseDuration.Nanoseconds())*2/3 + float64(r.grace.Nanoseconds())/3) + + // If we are within grace, return now; or, if the amount of time we + // would sleep would land us in the grace period. This helps with short + // tokens; for example, you don't want a current lease duration of 4 + // seconds, a grace period of 3 seconds, and end up sleeping for more + // than three of those seconds and having a very small budget of time + // to renew. + if leaseDuration <= r.grace || leaseDuration-sleepDuration <= r.grace { + return nil + } + + select { + case <-r.stopCh: + return nil + case <-time.After(sleepDuration): + continue + } + } +} + +// sleepDuration calculates the time to sleep given the base lease duration. The +// base is the resulting lease duration. It will be reduced to 1/3 and +// multiplied by a random float between 0.0 and 1.0. This extra randomness +// prevents multiple clients from all trying to renew simultaneously. +func (r *Renewer) sleepDuration(base time.Duration) time.Duration { + sleep := float64(base) + + // Renew at 1/3 the remaining lease. This will give us an opportunity to retry + // at least one more time should the first renewal fail. + sleep = sleep / 3.0 + + // Use a randomness so many clients do not hit Vault simultaneously. + sleep = sleep * (r.random.Float64() + 1) / 2.0 + + return time.Duration(sleep) +} + +// calculateGrace calculates the grace period based on a reasonable set of +// assumptions given the total lease time; it also adds some jitter to not have +// clients be in sync. +func (r *Renewer) calculateGrace(leaseDuration time.Duration) { + if leaseDuration == 0 { + r.grace = 0 + return + } + + leaseNanos := float64(leaseDuration.Nanoseconds()) + jitterMax := 0.1 * leaseNanos + + // For a given lease duration, we want to allow 80-90% of that to elapse, + // so the remaining amount is the grace period + r.grace = time.Duration(jitterMax) + time.Duration(uint64(r.random.Int63())%uint64(jitterMax)) +} diff --git a/vault/seal/transit/transit_client.go b/vault/seal/transit/transit_client.go index b9d6c1f12d95..48066ba6a61f 100644 --- a/vault/seal/transit/transit_client.go +++ b/vault/seal/transit/transit_client.go @@ -2,7 +2,6 @@ package transit import ( "encoding/base64" - "errors" "fmt" "os" "path" @@ -113,7 +112,7 @@ func newTransitClient(logger log.Logger, config map[string]string) (*transitClie apiClient.SetNamespace(namespace) } if apiClient.Token() == "" { - return nil, nil, errors.New("missing token") + logger.Info("no token provided to transit auto-seal") } client := &transitClient{ @@ -122,7 +121,7 @@ func newTransitClient(logger log.Logger, config map[string]string) (*transitClie keyName: keyName, } - if !disableRenewal { + if !disableRenewal && apiClient.Token() != "" { // Renew the token immediately to get a secret to pass to renewer secret, err := apiClient.Auth().Token().RenewTokenAsSelf(apiClient.Token(), 0) // If we don't get an error renewing, set up a renewer. The token may not be renewable or not have