Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport of VAULT-23926: Fix rollback deadlock on perf secondaries into release/1.16.x #25554

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/25448.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core (enterprise): Fix a deadlock that can occur on performance secondary clusters when there are many mounts and a mount is deleted or filtered
```
102 changes: 75 additions & 27 deletions vault/rollback.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ const (
RollbackWorkersEnvVar = "VAULT_ROLLBACK_WORKERS"
)

var rollbackCanceled = errors.New("rollback attempt canceled")

// RollbackManager is responsible for performing rollbacks of partial
// secrets within logical backends.
//
Expand Down Expand Up @@ -74,7 +76,9 @@ type rollbackState struct {
cancelLockGrabCtxCancel context.CancelFunc
// scheduled is the time that this job was created and submitted to the
// rollbackRunner
scheduled time.Time
scheduled time.Time
isRunning chan struct{}
isCanceled chan struct{}
}

// NewRollbackManager is used to create a new rollback manager
Expand Down Expand Up @@ -192,29 +196,45 @@ func (m *RollbackManager) triggerRollbacks() {
}
}

// startOrLookupRollback is used to start an async rollback attempt.
// This must be called with the inflightLock held.
func (m *RollbackManager) startOrLookupRollback(ctx context.Context, fullPath string, grabStatelock bool) *rollbackState {
m.inflightLock.Lock()
defer m.inflightLock.Unlock()
// lookupRollbackLocked checks if there's an inflight rollback with the given
// path. Callers must have the inflightLock. The function also reports metrics,
// since it is regularly called as part of the scheduled rollbacks.
func (m *RollbackManager) lookupRollbackLocked(fullPath string) *rollbackState {
defer metrics.SetGauge([]string{"rollback", "queued"}, float32(m.runner.WaitingQueueSize()))
defer metrics.SetGauge([]string{"rollback", "inflight"}, float32(len(m.inflight)))
rsInflight, ok := m.inflight[fullPath]
if ok {
return rsInflight
}
rsInflight := m.inflight[fullPath]
return rsInflight
}

// newRollbackLocked creates a new rollback state and adds it to the inflight
// rollback map. Callers must have the inflightLock
func (m *RollbackManager) newRollbackLocked(fullPath string) *rollbackState {
cancelCtx, cancelFunc := context.WithCancel(context.Background())
rs := &rollbackState{
cancelLockGrabCtx: cancelCtx,
cancelLockGrabCtxCancel: cancelFunc,
isRunning: make(chan struct{}),
isCanceled: make(chan struct{}),
scheduled: time.Now(),
}

// If no inflight rollback is already running, kick one off
m.inflight[fullPath] = rs
rs.Add(1)
m.inflightAll.Add(1)
rs.scheduled = time.Now()
return rs
}

// startOrLookupRollback is used to start an async rollback attempt.
func (m *RollbackManager) startOrLookupRollback(ctx context.Context, fullPath string, grabStatelock bool) *rollbackState {
m.inflightLock.Lock()
defer m.inflightLock.Unlock()
rs := m.lookupRollbackLocked(fullPath)
if rs != nil {
return rs
}

// If no inflight rollback is already running, kick one off
rs = m.newRollbackLocked(fullPath)

select {
case <-m.doneCh:
// if we've already shut down, then don't submit the task to avoid a panic
Expand Down Expand Up @@ -244,18 +264,27 @@ func (m *RollbackManager) finishRollback(rs *rollbackState, err error, fullPath
m.inflightLock.Lock()
defer m.inflightLock.Unlock()
}
delete(m.inflight, fullPath)
if _, ok := m.inflight[fullPath]; ok {
delete(m.inflight, fullPath)
}
}

// attemptRollback invokes a RollbackOperation for the given path
func (m *RollbackManager) attemptRollback(ctx context.Context, fullPath string, rs *rollbackState, grabStatelock bool) (err error) {
close(rs.isRunning)
defer m.finishRollback(rs, err, fullPath, true)
select {
case <-rs.isCanceled:
return rollbackCanceled
default:
}

metrics.MeasureSince([]string{"rollback", "waiting"}, rs.scheduled)
metricName := []string{"rollback", "attempt"}
if m.rollbackMetricsMountName {
metricName = append(metricName, strings.ReplaceAll(fullPath, "/", "-"))
}
defer metrics.MeasureSince(metricName, time.Now())
defer m.finishRollback(rs, err, fullPath, true)

ns, err := namespace.FromContext(ctx)
if err != nil {
Expand Down Expand Up @@ -286,6 +315,7 @@ func (m *RollbackManager) attemptRollback(ctx context.Context, fullPath string,
case <-m.shutdownCh:
case <-rs.cancelLockGrabCtx.Done():
case <-doneCh:
case <-rs.isCanceled:
}
}()

Expand All @@ -298,6 +328,8 @@ func (m *RollbackManager) attemptRollback(ctx context.Context, fullPath string,
select {
case <-m.shutdownCh:
return errors.New("rollback shutting down")
case <-rs.isCanceled:
return rollbackCanceled
default:
releaseLock = false
}
Expand Down Expand Up @@ -339,21 +371,37 @@ func (m *RollbackManager) Rollback(ctx context.Context, path string) error {
}
fullPath := ns.Path + path

// Check for an existing attempt or start one if none
rs := m.startOrLookupRollback(ctx, fullPath, false)
m.inflightLock.Lock()
rs := m.lookupRollbackLocked(fullPath)
if rs != nil {
// Since we have the statelock held, tell any inflight rollback to give up
// trying to acquire it. This will prevent deadlocks in the case where we
// have the write lock. In the case where it was waiting to grab
// a read lock it will then simply continue with the rollback
// operation under the protection of our write lock.
rs.cancelLockGrabCtxCancel()

select {
case <-rs.isRunning:
// if the rollback has started then we should wait for it to complete
m.inflightLock.Unlock()
rs.Wait()
return rs.lastError
default:
}

// if the rollback hasn't started and there's no capacity, we could
// end up deadlocking. Cancel the existing rollback and start a new
// one.
close(rs.isCanceled)
}
rs = m.newRollbackLocked(fullPath)
m.inflightLock.Unlock()

// Since we have the statelock held, tell any inflight rollback to give up
// trying to acquire it. This will prevent deadlocks in the case where we
// have the write lock. In the case where it was waiting to grab
// a read lock it will then simply continue with the rollback
// operation under the protection of our write lock.
rs.cancelLockGrabCtxCancel()
// we can ignore the error, since it's going to be set in rs.lastError
m.attemptRollback(ctx, fullPath, rs, false)

// It's safe to do this, since the other thread either already has the lock
// held, or we just canceled it above.
rs.Wait()

// Return the last error
return rs.lastError
}

Expand Down
2 changes: 2 additions & 0 deletions website/content/docs/release-notes/1.13.0.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ The fix for this UI issue is coming in the Vault 1.13.1 release.

@include 'known-issues/expiration-metrics-fatal-error.mdx'

@include 'known-issues/perf-secondary-many-mounts-deadlock.mdx'

## Feature deprecations and EOL

Please refer to the [Deprecation Plans and Notice](/vault/docs/deprecation) page
Expand Down
1 change: 1 addition & 0 deletions website/content/docs/release-notes/1.14.0.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ All | [API calls to update-primary may lead to data loss](/vault/docs/upgrad
1.14.0+ | [Sublogger levels not adjusted on reload](/vault/docs/upgrading/upgrade-to-1.14.x#sublogger-levels-unchanged-on-reload)
1.14.5 | [Fatal error during expiration metrics gathering causing Vault crash](/vault/docs/upgrading/upgrade-to-1.15.x#fatal-error-during-expiration-metrics-gathering-causing-vault-crash)
1.14.5 | [User lockout potential double logging](/vault/docs/upgrading/upgrade-to-1.14.x#user-lockout-logging)
1.14.5 - 1.14.9 | [Deadlock can occur on performance secondary clusters with many mounts](/vault/docs/upgrading/upgrade-to-1.14.x#deadlock-can-occur-on-performance-secondary-clusters-with-many-mounts)

## Vault companion updates

Expand Down
2 changes: 1 addition & 1 deletion website/content/docs/release-notes/1.15.0.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ description: |-
| 1.15.0+ | [URL change for KV v2 plugin](/vault/docs/upgrading/upgrade-to-1.15.x#kv2-url-change) |
| 1.15.1 | [Fatal error during expiration metrics gathering causing Vault crash](/vault/docs/upgrading/upgrade-to-1.15.x#fatal-error-during-expiration-metrics-gathering-causing-vault-crash) |
| 1.15.0 - 1.15.4 | [Audit devices could log raw data despite configuration](/vault/docs/upgrading/upgrade-to-1.15.x#audit-devices-could-log-raw-data-despite-configuration) |

| 1.15.0 - 1.15.5 | [Deadlock can occur on performance secondary clusters with many mounts](/vault/docs/upgrading/upgrade-to-1.15.x#deadlock-can-occur-on-performance-secondary-clusters-with-many-mounts)

## Vault companion updates

Expand Down
2 changes: 2 additions & 0 deletions website/content/docs/upgrading/upgrade-to-1.13.x.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,5 @@ Affects Vault 1.13.0+
@include 'known-issues/sublogger-levels-unchanged-on-reload.mdx'

@include 'known-issues/expiration-metrics-fatal-error.mdx'

@include 'known-issues/perf-secondary-many-mounts-deadlock.mdx'
2 changes: 2 additions & 0 deletions website/content/docs/upgrading/upgrade-to-1.14.x.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,5 @@ is measuring cumulative time writing, and not the distribution of individual wri
@include 'known-issues/sublogger-levels-unchanged-on-reload.mdx'

@include 'known-issues/expiration-metrics-fatal-error.mdx'

@include 'known-issues/perf-secondary-many-mounts-deadlock.mdx'
2 changes: 2 additions & 0 deletions website/content/docs/upgrading/upgrade-to-1.15.x.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,5 @@ option.
@include 'known-issues/expiration-metrics-fatal-error.mdx'

@include 'known-issues/1_15_audit-use-of-log-raw-applies-to-all-devices.mdx'

@include 'known-issues/perf-secondary-many-mounts-deadlock.mdx'
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
### Deadlock can occur on performance secondary clusters with many mounts

#### Affected versions

- 1.15.0 - 1.15.5
- 1.14.5 - 1.14.9
- 1.13.9 - 1.13.13

#### Issue

Vault 1.15.0, 1.14.5, and 1.13.9 introduced a worker pool to schedule periodic
rollback operations on all mounts. This worker pool defaulted to using 256
workers. The worker pool introduced a risk of deadlocking on the active node of
**performance secondary clusters**, leaving that cluster unable to service any
requests.

The conditions required to cause the deadlock on the performance secondary:

- Performance replication is enabled
- The performance primary cluster has more than 256 non-local mounts. The more
mounts the cluster has, the more likely the deadlock becomes
- One of the following occurs:
- A replicated mount is unmounted or remounted OR
- A replicated namespace is deleted OR
- Replication paths filters are used to filter at least one mount or namespace

#### Workaround

Set the `VAULT_ROLLBACK_WORKERS` environment variable to a number larger than
the number of mounts in your Vault cluster and restart Vault:

```shell-session
$ export VAULT_ROLLBACK_WORKERS=1000
```
Loading