Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

agent/cache: Store leases in-order in persistent cache so that restore respects dependencies #12843

Merged
merged 11 commits into from
Oct 27, 2021
3 changes: 3 additions & 0 deletions changelog/12843.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
agent/cache: Process persistent cache leases in dependency order during restore to ensure child leases are always correctly restored
```
184 changes: 139 additions & 45 deletions command/agent/cache/cacheboltdb/bolt.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cacheboltdb

import (
"context"
"encoding/binary"
"fmt"
"os"
"path/filepath"
Expand All @@ -17,7 +18,7 @@ import (
const (
// Keep track of schema version for future migrations
storageVersionKey = "version"
storageVersion = "1"
storageVersion = "2" // v2 merges auth-lease and secret-lease buckets into one ordered bucket

// DatabaseFileName - filename for the persistent cache file
DatabaseFileName = "vault-agent-cache.db"
Expand All @@ -26,15 +27,29 @@ const (
// bootstrapping keys
metaBucketName = "meta"

// SecretLeaseType - Bucket/type for leases with secret info
// DEPRECATED: SecretLeaseType - v1 Bucket/type for leases with secret info
SecretLeaseType = "secret-lease"

// AuthLeaseType - Bucket/type for leases with auth info
// DEPRECATED: AuthLeaseType - v1 Bucket/type for leases with auth info
AuthLeaseType = "auth-lease"

// TokenType - Bucket/type for auto-auth tokens
TokenType = "token"

// LeaseType - v2 Bucket/type for auth AND secret leases.
//
// This bucket stores keys in the same order they were created using
// auto-incrementing keys and the fact that BoltDB stores keys in byte
// slice order. This means when we iterate through this bucket during
// restore, we will always restore parent tokens before their children,
// allowing us to correctly attach child contexts to their parent's context.
LeaseType = "lease"

// LookupType - v2 Bucket/type to map from a memcachedb index ID to an
// auto-incrementing BoltDB key. Facilitates deletes from the lease
// bucket using an ID instead of the auto-incrementing BoltDB key.
lookupType = "lookup"
tomhjp marked this conversation as resolved.
Show resolved Hide resolved

// AutoAuthToken - key for the latest auto-auth token
AutoAuthToken = "auto-auth-token"

Expand Down Expand Up @@ -99,25 +114,88 @@ func createBoltSchema(tx *bolt.Tx) error {
if err != nil {
return fmt.Errorf("failed to set storage version: %w", err)
}
case string(version) != storageVersion:

return createV2BoltSchema(tx)

case string(version) == storageVersion:
return createV2BoltSchema(tx)

case string(version) == "1":
return migrateFromV1ToV2Schema(tx)

default:
return fmt.Errorf("storage migration from %s to %s not implemented", string(version), storageVersion)
}
}

func createV2BoltSchema(tx *bolt.Tx) error {
// create the buckets for tokens and leases
_, err = tx.CreateBucketIfNotExists([]byte(TokenType))
if err != nil {
return fmt.Errorf("failed to create token bucket: %w", err)
for _, bucket := range []string{TokenType, LeaseType, lookupType} {
if _, err := tx.CreateBucketIfNotExists([]byte(bucket)); err != nil {
return fmt.Errorf("failed to create token bucket: %w", err)
tomhjp marked this conversation as resolved.
Show resolved Hide resolved
}
}

return nil
}

func migrateFromV1ToV2Schema(tx *bolt.Tx) error {
if err := createV2BoltSchema(tx); err != nil {
return err
}

for _, leaseType := range []string{AuthLeaseType, SecretLeaseType} {
if bucket := tx.Bucket([]byte(leaseType)); bucket != nil {
bucket.ForEach(func(key, value []byte) error {
autoIncKey, err := autoIncrementedLeaseKey(tx, string(key))
if err != nil {
return fmt.Errorf("error migrating %s %q key to auto incremented key: %w", leaseType, string(key), err)
}
if err := tx.Bucket([]byte(LeaseType)).Put(autoIncKey, value); err != nil {
return fmt.Errorf("error migrating %s %q from v1 to v2 schema: %w", leaseType, string(key), err)
}
return nil
})

if err := tx.DeleteBucket([]byte(leaseType)); err != nil {
return fmt.Errorf("failed to clean up %s bucket during v1 to v2 schema migration: %w", leaseType, err)
}
}
}

if err := tx.Bucket([]byte(metaBucketName)).Put([]byte(storageVersionKey), []byte(storageVersion)); err != nil {
return fmt.Errorf("failed to update schema from v1 to v2: %w", err)
}
_, err = tx.CreateBucketIfNotExists([]byte(AuthLeaseType))

return nil
}

func autoIncrementedLeaseKey(tx *bolt.Tx, id string) ([]byte, error) {
leaseBucket := tx.Bucket([]byte(LeaseType))
keyValue, err := leaseBucket.NextSequence()
if err != nil {
return fmt.Errorf("failed to create auth lease bucket: %w", err)
return nil, fmt.Errorf("failed to generate lookup key for id %q: %w", id, err)
}
_, err = tx.CreateBucketIfNotExists([]byte(SecretLeaseType))

key := make([]byte, 8)
// MUST be big endian, because keys are ordered by byte slice comparison
// which progressively compares each byte in the slice starting at index 0.
// BigEndian in the range [255-257] looks like this:
// [0 0 0 0 0 0 0 255]
// [0 0 0 0 0 0 1 0]
// [0 0 0 0 0 0 1 1]
// LittleEndian in the same range looks like this:
// [255 0 0 0 0 0 0 0]
// [0 1 0 0 0 0 0 0]
// [1 1 0 0 0 0 0 0]
binary.BigEndian.PutUint64(key, keyValue)

err = tx.Bucket([]byte(lookupType)).Put([]byte(id), key)
if err != nil {
return fmt.Errorf("failed to create secret lease bucket: %w", err)
return nil, err
}

return nil
return key, nil
}

// Set an index (token or lease) in bolt storage
Expand All @@ -133,44 +211,56 @@ func (b *BoltStorage) Set(ctx context.Context, id string, plaintext []byte, inde
}

return b.db.Update(func(tx *bolt.Tx) error {
s := tx.Bucket([]byte(indexType))
if s == nil {
return fmt.Errorf("bucket %q not found", indexType)
}
// If this is an auto-auth token, also stash it in the meta bucket for
// easy retrieval upon restore
if indexType == TokenType {
var key []byte
switch indexType {
case LeaseType:
// If this is a lease type, generate an auto-incrementing key and
// store an ID -> key lookup entry
key, err = autoIncrementedLeaseKey(tx, id)
if err != nil {
return err
}
case TokenType:
// If this is an auto-auth token, also stash it in the meta bucket for
// easy retrieval upon restore
key = []byte(id)
meta := tx.Bucket([]byte(metaBucketName))
if err := meta.Put([]byte(AutoAuthToken), protoBlob); err != nil {
return fmt.Errorf("failed to set latest auto-auth token: %w", err)
}
default:
return fmt.Errorf("called Set for unsupported type %q", indexType)
}
return s.Put([]byte(id), protoBlob)
})
}

func getBucketIDs(b *bolt.Bucket) ([][]byte, error) {
ids := [][]byte{}
err := b.ForEach(func(k, v []byte) error {
ids = append(ids, k)
return nil
s := tx.Bucket([]byte(indexType))
if s == nil {
return fmt.Errorf("bucket %q not found", indexType)
}
return s.Put(key, protoBlob)
})
return ids, err
}

// Delete an index (token or lease) by id from bolt storage
func (b *BoltStorage) Delete(id string) error {
// Delete an index (token or lease) by key from bolt storage
func (b *BoltStorage) Delete(id string, indexType string) error {
return b.db.Update(func(tx *bolt.Tx) error {
// Since Delete returns a nil error if the key doesn't exist, just call
// delete in all three index buckets without checking existence first
if err := tx.Bucket([]byte(TokenType)).Delete([]byte(id)); err != nil {
return fmt.Errorf("failed to delete %q from token bucket: %w", id, err)
key := []byte(id)
if indexType == LeaseType {
key = tx.Bucket([]byte(lookupType)).Get(key)
if key == nil {
return fmt.Errorf("failed to lookup bolt DB key for id %q", id)
}

err := tx.Bucket([]byte(lookupType)).Delete([]byte(id))
if err != nil {
return fmt.Errorf("failed to delete %q from lookup bucket: %w", id, err)
}
}
if err := tx.Bucket([]byte(AuthLeaseType)).Delete([]byte(id)); err != nil {
return fmt.Errorf("failed to delete %q from auth lease bucket: %w", id, err)

bucket := tx.Bucket([]byte(indexType))
if bucket == nil {
return fmt.Errorf("bucket %q not found during delete", indexType)
}
if err := tx.Bucket([]byte(SecretLeaseType)).Delete([]byte(id)); err != nil {
return fmt.Errorf("failed to delete %q from secret lease bucket: %w", id, err)
if err := bucket.Delete(key); err != nil {
return fmt.Errorf("failed to delete %q from %q bucket: %w", id, indexType, err)
}
b.logger.Trace("deleted index from bolt db", "id", id)
return nil
Expand All @@ -193,10 +283,14 @@ func (b *BoltStorage) GetByType(ctx context.Context, indexType string) ([][]byte
err := b.db.View(func(tx *bolt.Tx) error {
var errors *multierror.Error

tx.Bucket([]byte(indexType)).ForEach(func(id, ciphertext []byte) error {
bucket := tx.Bucket([]byte(indexType))
if bucket == nil {
return fmt.Errorf("bucket %q not found", indexType)
}
bucket.ForEach(func(key, ciphertext []byte) error {
plaintext, err := b.decrypt(ctx, ciphertext)
if err != nil {
errors = multierror.Append(errors, fmt.Errorf("error decrypting index id %s: %w", id, err))
errors = multierror.Append(errors, fmt.Errorf("error decrypting entry %s: %w", string(key), err))
tomhjp marked this conversation as resolved.
Show resolved Hide resolved
return nil
}

Expand Down Expand Up @@ -247,11 +341,11 @@ func (b *BoltStorage) GetRetrievalToken() ([]byte, error) {
var token []byte

err := b.db.View(func(tx *bolt.Tx) error {
keyBucket := tx.Bucket([]byte(metaBucketName))
if keyBucket == nil {
metaBucket := tx.Bucket([]byte(metaBucketName))
if metaBucket == nil {
return fmt.Errorf("bucket %q not found", metaBucketName)
}
value := keyBucket.Get([]byte(RetrievalTokenMaterial))
value := metaBucket.Get([]byte(RetrievalTokenMaterial))
if value != nil {
token = make([]byte, len(value))
copy(token, value)
Expand Down Expand Up @@ -286,7 +380,7 @@ func (b *BoltStorage) Close() error {
// the schema/layout
func (b *BoltStorage) Clear() error {
return b.db.Update(func(tx *bolt.Tx) error {
for _, name := range []string{AuthLeaseType, SecretLeaseType, TokenType} {
for _, name := range []string{TokenType, LeaseType, lookupType} {
b.logger.Trace("deleting bolt bucket", "name", name)
if err := tx.DeleteBucket([]byte(name)); err != nil {
return err
Expand Down
36 changes: 17 additions & 19 deletions command/agent/cache/cacheboltdb/bolt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ func TestBolt_SetGet(t *testing.T) {
})
require.NoError(t, err)

secrets, err := b.GetByType(ctx, SecretLeaseType)
secrets, err := b.GetByType(ctx, LeaseType)
assert.NoError(t, err)
require.Len(t, secrets, 0)

err = b.Set(ctx, "test1", []byte("hello"), SecretLeaseType)
err = b.Set(ctx, "test1", []byte("hello"), LeaseType)
assert.NoError(t, err)
secrets, err = b.GetByType(ctx, SecretLeaseType)
secrets, err = b.GetByType(ctx, LeaseType)
assert.NoError(t, err)
require.Len(t, secrets, 1)
assert.Equal(t, []byte("hello"), secrets[0])
Expand All @@ -62,19 +62,19 @@ func TestBoltDelete(t *testing.T) {
})
require.NoError(t, err)

err = b.Set(ctx, "secret-test1", []byte("hello1"), SecretLeaseType)
err = b.Set(ctx, "secret-test1", []byte("hello1"), LeaseType)
require.NoError(t, err)
err = b.Set(ctx, "secret-test2", []byte("hello2"), SecretLeaseType)
err = b.Set(ctx, "secret-test2", []byte("hello2"), LeaseType)
require.NoError(t, err)

secrets, err := b.GetByType(ctx, SecretLeaseType)
secrets, err := b.GetByType(ctx, LeaseType)
require.NoError(t, err)
assert.Len(t, secrets, 2)
assert.ElementsMatch(t, [][]byte{[]byte("hello1"), []byte("hello2")}, secrets)

err = b.Delete("secret-test1")
err = b.Delete("secret-test1", LeaseType)
require.NoError(t, err)
secrets, err = b.GetByType(ctx, SecretLeaseType)
secrets, err = b.GetByType(ctx, LeaseType)
require.NoError(t, err)
require.Len(t, secrets, 1)
assert.Equal(t, []byte("hello2"), secrets[0])
Expand All @@ -95,19 +95,20 @@ func TestBoltClear(t *testing.T) {
require.NoError(t, err)

// Populate the bolt db
err = b.Set(ctx, "secret-test1", []byte("hello"), SecretLeaseType)
err = b.Set(ctx, "secret-test1", []byte("hello1"), LeaseType)
require.NoError(t, err)
secrets, err := b.GetByType(ctx, SecretLeaseType)
secrets, err := b.GetByType(ctx, LeaseType)
require.NoError(t, err)
require.Len(t, secrets, 1)
assert.Equal(t, []byte("hello"), secrets[0])
assert.Equal(t, []byte("hello1"), secrets[0])

err = b.Set(ctx, "auth-test1", []byte("hello"), AuthLeaseType)
err = b.Set(ctx, "auth-test1", []byte("hello2"), LeaseType)
require.NoError(t, err)
auths, err := b.GetByType(ctx, AuthLeaseType)
auths, err := b.GetByType(ctx, LeaseType)
require.NoError(t, err)
require.Len(t, auths, 1)
assert.Equal(t, []byte("hello"), auths[0])
require.Len(t, auths, 2)
assert.Equal(t, []byte("hello1"), auths[0])
assert.Equal(t, []byte("hello2"), auths[1])

err = b.Set(ctx, "token-test1", []byte("hello"), TokenType)
require.NoError(t, err)
Expand All @@ -119,10 +120,7 @@ func TestBoltClear(t *testing.T) {
// Clear the bolt db, and check that it's indeed clear
err = b.Clear()
require.NoError(t, err)
secrets, err = b.GetByType(ctx, SecretLeaseType)
require.NoError(t, err)
assert.Len(t, secrets, 0)
auths, err = b.GetByType(ctx, AuthLeaseType)
auths, err = b.GetByType(ctx, LeaseType)
require.NoError(t, err)
assert.Len(t, auths, 0)
tokens, err = b.GetByType(ctx, TokenType)
Expand Down
Loading