Skip to content

Commit

Permalink
storage: override MVCCMetadata.TxnDidNotUpdateMeta in mixed version c…
Browse files Browse the repository at this point in the history
…luster

Specifically, if the cluster version indicates that there can
be nodes with the broken SingleDelete logic for separated
intent resolution, the MVCCMetadata.TxnDidNotUpdateMeta
field will never be set to true.

See code comments and #69891 (comment)

Informs #69891

Release justification: fix for a release blocker that causes incorrect
behavior for transactional writes.

Release note: None
  • Loading branch information
sumeerbhola committed Sep 15, 2021
1 parent a29fd0a commit 21f4eed
Show file tree
Hide file tree
Showing 16 changed files with 590 additions and 35 deletions.
2 changes: 2 additions & 0 deletions pkg/ccl/storageccl/engineccl/encrypted_fs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/ccl/baseccl"
"github.com/cockroachdb/cockroach/pkg/ccl/storageccl/engineccl/enginepbccl"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
Expand Down Expand Up @@ -247,6 +248,7 @@ func TestPebbleEncryption(t *testing.T) {
StorageConfig: base.StorageConfig{
Attrs: roachpb.Attributes{},
MaxSize: 512 << 20,
Settings: cluster.MakeTestingClusterSettings(),
UseFileRegistry: true,
EncryptionOptions: encOptionsBytes,
},
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/rangefeed/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ go_test(
"//pkg/base",
"//pkg/keys",
"//pkg/roachpb:with-mocks",
"//pkg/settings/cluster",
"//pkg/storage",
"//pkg/storage/enginepb",
"//pkg/testutils",
Expand Down
3 changes: 2 additions & 1 deletion pkg/kv/kvserver/rangefeed/catchup_scan_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
Expand Down Expand Up @@ -184,7 +185,7 @@ func setupMVCCPebble(b testing.TB, dir string, lBaseMaxBytes int64, readOnly boo
peb, err := storage.NewPebble(
context.Background(),
storage.PebbleConfig{
StorageConfig: base.StorageConfig{Dir: dir},
StorageConfig: base.StorageConfig{Dir: dir, Settings: cluster.MakeTestingClusterSettings()},
Opts: opts,
})
if err != nil {
Expand Down
11 changes: 8 additions & 3 deletions pkg/kv/kvserver/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ func (tc *testContext) StartWithStoreConfigAndVersion(
storage.InMemory(),
storage.Attributes(roachpb.Attributes{Attrs: []string{"dc1", "mem"}}),
storage.MaxSize(1<<20),
storage.SetSeparatedIntents(disableSeparatedIntents))
storage.SetSeparatedIntents(disableSeparatedIntents),
storage.Settings(cfg.Settings))
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -6433,9 +6434,9 @@ func TestRangeStatsComputation(t *testing.T) {
}
expMS = baseStats
expMS.Add(enginepb.MVCCStats{
LiveBytes: 103,
LiveBytes: 101,
KeyBytes: 28,
ValBytes: 75,
ValBytes: 73,
IntentBytes: 23,
LiveCount: 2,
KeyCount: 2,
Expand All @@ -6445,6 +6446,10 @@ func TestRangeStatsComputation(t *testing.T) {
if tc.engine.IsSeparatedIntentsEnabledForTesting(ctx) {
expMS.SeparatedIntentCount++
}
if !tc.engine.OverrideTxnDidNotUpdateMetaToFalse(ctx) {
expMS.LiveBytes += 2
expMS.ValBytes += 2
}
if err := verifyRangeStats(tc.engine, tc.repl.RangeID, expMS); err != nil {
t.Fatal(err)
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/spanset/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,10 @@ func (s spanSetWriter) LogLogicalOp(
s.w.LogLogicalOp(op, details)
}

func (s spanSetWriter) OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool {
return s.w.OverrideTxnDidNotUpdateMetaToFalse(ctx)
}

// ReadWriter is used outside of the spanset package internally, in ccl.
type ReadWriter struct {
spanSetReader
Expand Down
25 changes: 25 additions & 0 deletions pkg/storage/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,31 @@ type Writer interface {
//
// It is safe to modify the contents of the arguments after it returns.
SingleClearEngineKey(key EngineKey) error

// OverrideTxnDidNotUpdateMetaToFalse is a temporary method that will be removed
// for 22.1.
//
// See #69891 for details on the bug related to usage of SingleDelete in
// separated intent resolution. The following is needed for correctly
// migrating from 21.1 to 21.2.
//
// We have fixed the intent resolution code path in 21.2-beta to use
// SingleDelete more conservatively. The 21.2-GA will also likely include
// Pebble changes to make the old buggy usage of SingleDelete correct.
// However, there is a problem if someone upgrades from 21.1 to
// 21.2-beta/21.2-GA:
// 21.1 nodes will not write separated intents while they are the
// leaseholder for a range. However they can become the leaseholder for a
// range after a separated intent was written (in a mixed version cluster).
// Hence they can resolve separated intents. The logic in 21.1 for using
// SingleDelete when resolving intents is similarly buggy, and the Pebble
// code included in 21.1 will not make this buggy usage correct. The
// solution is for 21.2 nodes to never set txnDidNotUpdateMeta=true when
// writing separated intents, until the cluster version is at the version
// when the buggy code was fixed in 21.2. So 21.1 code will never use
// SingleDelete when resolving these separated intents (since the only
// separated intents being written are by 21.2 nodes).
OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool
}

// ReadWriter is the read/write interface to an engine's data.
Expand Down
104 changes: 84 additions & 20 deletions pkg/storage/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1069,8 +1069,7 @@ func (b *putBuffer) putIntentMeta(
ctx context.Context,
writer Writer,
key MVCCKey,
state PrecedingIntentState,
txnDidNotUpdateMeta bool,
helper txnDidNotUpdateMetaHelper,
meta *enginepb.MVCCMetadata,
) (keyBytes, valBytes int64, separatedIntentCountDelta int, err error) {
if meta.Txn != nil && meta.Timestamp.ToTimestamp() != meta.Txn.WriteTimestamp {
Expand All @@ -1079,32 +1078,86 @@ func (b *putBuffer) putIntentMeta(
return 0, 0, 0, errors.AssertionFailedf(
"meta.Timestamp != meta.Txn.WriteTimestamp: %s != %s", meta.Timestamp, meta.Txn.WriteTimestamp)
}
// All nodes in this cluster understand separated intents, so can fiddle
// with TxnDidNotUpdateMeta, which is not understood by older nodes (which
// are no longer present, and will never again be present).
//
// NB: the parameter txnDidNotUpdateMeta is about what happened prior to
// this Put, and is passed through to writer below. The field
// TxnDidNotUpdateMeta, in the MVCCMetadata we are about to write,
// includes what happened in this Put.
if state == NoExistingIntent {
meta.TxnDidNotUpdateMeta = &trueValue
} else {
// Absence represents false.
meta.TxnDidNotUpdateMeta = nil
}

helper.populateMeta(ctx, meta)
bytes, err := b.marshalMeta(meta)
if err != nil {
return 0, 0, 0, err
}
if separatedIntentCountDelta, err = writer.PutIntent(
ctx, key.Key, bytes, state, txnDidNotUpdateMeta, meta.Txn.ID); err != nil {
ctx, key.Key, bytes, helper.state, helper.valueForPutIntent(), meta.Txn.ID); err != nil {
return 0, 0, 0, err
}
return int64(key.EncodedSize()), int64(len(bytes)), separatedIntentCountDelta, nil
}

// txnDidNotUpdateMetaHelper is used to decide what to put in the MVCCMetadata
// proto, and what value to pass in the txnDidNotUpdateMeta parameter of
// PutIntent.
//
// Note that separated intents are understood by v21.1 already, though we
// assume they were never actively written there (the cluster setting defaults
// to false and there are known bugs). Therefore all nodes in a cluster (v21.1
// or v21.2) understand separated intents. This understanding by v21.1
// includes reading and resolving separated intents, and setting
// MVCCMetadata.TxnDidNotUpdateMeta to true. Note that since v21.1 nodes only
// write MVCCMetadata to interleaved intents, they can only set
// MVCCMetadata.TxnDidNotUpdateMeta to true for interleaved intents, which is
// harmless since interleaved intents do not invoke the SingleDelete
// optimization.
//
// However, when migrating from v21.1 to v21.2, and running in a mixed version
// cluster, we need to be careful. A v21.1 node can become the leaseholder for
// a range after a separated intent was written by a v21.2 node. Hence they
// can resolve separated intents. The logic in v21.1 for using SingleDelete
// when resolving intents is similarly buggy, and the Pebble code included in
// v21.1 will not make this buggy usage correct. The solution is for v21.2
// nodes to never set txnDidNotUpdateMeta=true when writing separated intents,
// until the cluster version is at the version when the buggy code was fixed
// in v21.2. So v21.1 code will never use SingleDelete when resolving these
// separated intents (since the only separated intents being written are by
// v21.2 nodes).
//
// Details about this helper:
// - The txnDidNotUpdateMeta field is about what happened prior to this Put,
// and is intended to be passed through to Writer.PutIntent.
// In general it can be true in 2 cases:
// - There was no prior intent.
// - MVCCMetadata.TxnDidNotUpdateMeta is true: v21.2 nodes will never set
// this to true in a mixed version cluster (see next bullet). However,
// v21.1 nodes can set it to true.
// What saves us is that it is only used by intentDemuxWriter when there was
// an existing separated intent that was written once and the writer is
// writing interleaved intents, and the true value enables SingleDelete of
// the existing separated intent. This transition can happen when an intent
// written at a v21.2 node is rewritten on a v21.1 node. So it is irrelevant
// for the first case above. And since v21.2 nodes never set
// MVCCMetadata.TxnDidNotUpdateMeta to true in a mixed version cluster, the
// situation in which this value is used will always be false, which takes
// care of case 2 above.
//
// - The state field describes the preceding intent state. It is intended to
// be used for populating the MVCCMetadata.TxnDidNotUpdateMeta. This is
// where we use Writer.OverrideTxnDidNotUpdateMetaToFalse to override to
// false until there can never be v21.1 nodes.
type txnDidNotUpdateMetaHelper struct {
txnDidNotUpdateMeta bool
state PrecedingIntentState
w Writer
}

func (t txnDidNotUpdateMetaHelper) valueForPutIntent() bool {
return t.txnDidNotUpdateMeta
}

func (t txnDidNotUpdateMetaHelper) populateMeta(ctx context.Context, meta *enginepb.MVCCMetadata) {
if t.state == NoExistingIntent && !t.w.OverrideTxnDidNotUpdateMetaToFalse(ctx) {
meta.TxnDidNotUpdateMeta = &trueValue
} else {
// Absence represents false.
meta.TxnDidNotUpdateMeta = nil
}
}

// MVCCPut sets the value for a specified key. It will save the value
// with different versions according to its timestamp and update the
// key metadata. The timestamp must be passed as a parameter; using
Expand Down Expand Up @@ -1740,7 +1793,12 @@ func mvccPutInternal(
var separatedIntentCountDelta int
if newMeta.Txn != nil {
metaKeySize, metaValSize, separatedIntentCountDelta, err = buf.putIntentMeta(
ctx, writer, metaKey, precedingIntentState, txnDidNotUpdateMeta, newMeta)
ctx, writer, metaKey,
txnDidNotUpdateMetaHelper{
txnDidNotUpdateMeta: txnDidNotUpdateMeta,
state: precedingIntentState,
w: writer,
}, newMeta)
if err != nil {
return err
}
Expand Down Expand Up @@ -3149,7 +3207,13 @@ func mvccResolveWriteIntent(
// to do anything to update the intent but to move the timestamp forward,
// even if it can.
metaKeySize, metaValSize, separatedIntentCountDelta, err = buf.putIntentMeta(
ctx, rw, metaKey, precedingIntentState, canSingleDelHelper.v(), &buf.newMeta)
ctx, rw, metaKey,
txnDidNotUpdateMetaHelper{
txnDidNotUpdateMeta: canSingleDelHelper.v(),
state: precedingIntentState,
w: rw,
},
&buf.newMeta)
} else {
metaKeySize = int64(metaKey.EncodedSize())
separatedIntentCountDelta, err =
Expand Down
27 changes: 26 additions & 1 deletion pkg/storage/mvcc_history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ import (
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
Expand Down Expand Up @@ -109,7 +111,30 @@ func TestMVCCHistories(t *testing.T) {
"randomly setting enableSeparated: %t", enabledSeparated)
}
// We start from a clean slate in every test file.
engine, err := Open(ctx, InMemory(), CacheSize(1<<20 /* 1 MiB */), SetSeparatedIntents(!enabledSeparated))
engine, err := Open(ctx, InMemory(), CacheSize(1<<20 /* 1 MiB */),
SetSeparatedIntents(!enabledSeparated),
func(cfg *engineConfig) error {
if !overridden {
// Latest cluster version, since these tests are not ones where we
// are examining differences related to separated intents.
cfg.Settings = cluster.MakeTestingClusterSettings()
} else {
if !enabledSeparated {
// 21.1, which has the old code that is unaware about the changes
// we have made for OverrideTxnDidNotUpdateMetaToFalse. By using
// the latest cluster version, we effectively undo these changes.
cfg.Settings = cluster.MakeTestingClusterSettings()
} else if strings.Contains(path, "mixed_cluster") {
v21_1 := clusterversion.ByKey(clusterversion.V21_1)
cfg.Settings =
cluster.MakeTestingClusterSettingsWithVersions(v21_1, v21_1, true)
} else {
// Latest cluster version.
cfg.Settings = cluster.MakeTestingClusterSettings()
}
}
return nil
})
if err != nil {
t.Fatal(err)
}
Expand Down
22 changes: 20 additions & 2 deletions pkg/storage/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,11 @@ func (p *Pebble) PutEngineKey(key EngineKey, value []byte) error {
return p.db.Set(key.Encode(), value, pebble.Sync)
}

// OverrideTxnDidNotUpdateMetaToFalse implements the Engine interface.
func (p *Pebble) OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool {
return overrideTxnDidNotUpdateMetaToFalse(ctx, p.settings)
}

// IsSeparatedIntentsEnabledForTesting implements the Engine interface.
func (p *Pebble) IsSeparatedIntentsEnabledForTesting(ctx context.Context) bool {
return !p.disableSeparatedIntents
Expand Down Expand Up @@ -1265,7 +1270,7 @@ func (p *Pebble) GetAuxiliaryDir() string {
func (p *Pebble) NewBatch() Batch {
return newPebbleBatch(
p.db, p.db.NewIndexedBatch(), false, /* writeOnly */
p.disableSeparatedIntents)
p.disableSeparatedIntents, overrideTxnDidNotUpdateMetaToFalse(context.TODO(), p.settings))
}

// NewReadOnly implements the Engine interface.
Expand All @@ -1275,7 +1280,8 @@ func (p *Pebble) NewReadOnly() ReadWriter {

// NewUnindexedBatch implements the Engine interface.
func (p *Pebble) NewUnindexedBatch(writeOnly bool) Batch {
return newPebbleBatch(p.db, p.db.NewBatch(), writeOnly, p.disableSeparatedIntents)
return newPebbleBatch(p.db, p.db.NewBatch(), writeOnly, p.disableSeparatedIntents,
overrideTxnDidNotUpdateMetaToFalse(context.TODO(), p.settings))
}

// NewSnapshot implements the Engine interface.
Expand Down Expand Up @@ -1806,6 +1812,10 @@ func (p *pebbleReadOnly) LogLogicalOp(op MVCCLogicalOpType, details MVCCLogicalO
panic("not implemented")
}

func (p *pebbleReadOnly) OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool {
panic("not implemented")
}

// pebbleSnapshot represents a snapshot created using Pebble.NewSnapshot().
type pebbleSnapshot struct {
snapshot *pebble.Snapshot
Expand Down Expand Up @@ -2097,3 +2107,11 @@ func pebbleExportToSst(

return rows.BulkOpSummary, MVCCKey{Key: resumeKey, Timestamp: resumeTS}, nil
}

// See the comment for Writer.OverrideTxnDidNotUpdateMetaToFalse.
func overrideTxnDidNotUpdateMetaToFalse(ctx context.Context, st *cluster.Settings) bool {
// The fix to the single delete bug in 21.2 has nothing to do with
// PebbleFormatVersioned, but both are part of the 21.2 beta, which will be
// the earliest production version of 21.2.
return !st.Version.ActiveVersionOrEmpty(ctx).IsActive(clusterversion.PebbleFormatVersioned)
}
Loading

0 comments on commit 21f4eed

Please sign in to comment.