Skip to content

Commit

Permalink
storage: make RaftTombstoneKey unreplicated
Browse files Browse the repository at this point in the history
RaftTombstoneKey was accidentally made a replicated key when it was first
introduced, a problem we first realized existed when it was [included in
snapshots].

At the time, we included workarounds to skip this key in various places
(snapshot application, consistency checker) but of course we have failed to
insert further hacks of the same kind elsewhere since (the one that prompting
this PR being the stats recomputation on splits, which I'm looking into as part
of cockroachdb#20181 -- unfortunately this commit doesn't seem to pertain to that problem)

It feels sloppy that we didn't follow through back then, but luckily the damage
appears to be limited; it is likely that the replicated existence of this key
results in MVCCStats SysBytes inconsistencies, but as it happens, these stats
are [already] [very] [inconsistent].

This commit does a few things:

- renames the old tombstone key to `RaftIncorrectLegacyTombstoneKey`
- introduces a (correctly unreplicated) `RaftTombstoneKey`
- introduces a migration. Once activated, only the new tombstone is written,
  but both tombstones are checked. Additionally, as the node restarts, all
  legacy tombstones are replaced by correct non-legacy ones.
- when applying a snapshot, forcibly delete any legacy tombstone contained
  within (even before the cluster version is bumped). This prevents new
  legacy tombstones to trickle on from other nodes.

`RaftIncorrectLegacyTombstoneKey` can be purged from the codebase in binaries
post v2.1, as at that point all peers have booted with a version that runs the
migration.

Thus, post v2.1, the replica consistency checker can stop skipping the legacy
tombstone key.

Fixes cockroachdb#12154.

Release note: None

[included in snapshots]: cockroachdb#12131
[already]: cockroachdb#20554
[very]: cockroachdb#20996
[inconsistent]: cockroachdb#21070
  • Loading branch information
tbg committed Jan 9, 2018
1 parent d41624a commit 3dc1d24
Show file tree
Hide file tree
Showing 15 changed files with 544 additions and 28 deletions.
24 changes: 19 additions & 5 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,13 @@ func DecodeAbortSpanKey(key roachpb.Key, dest []byte) (uuid.UUID, error) {
return txnID, err
}

// RaftTombstoneKey returns a system-local key for a raft tombstone.
func RaftTombstoneKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RaftTombstoneKey()
// RaftTombstoneIncorrectLegacyKey returns a system-local key for a raft tombstone.
// This key was accidentally made replicated though it is not, requiring awkward
// workarounds. This method and all users can be removed in any binary version > 2.1.
//
// See https://github.com/cockroachdb/cockroach/issues/12154.
func RaftTombstoneIncorrectLegacyKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RaftTombstoneIncorrectLegacyKey()
}

// RaftAppliedIndexKey returns a system-local key for a raft applied index.
Expand Down Expand Up @@ -286,6 +290,11 @@ func makeRangeIDUnreplicatedKey(
return key
}

// RaftTombstoneKey returns a system-local key for a raft tombstone.
func RaftTombstoneKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RaftTombstoneKey()
}

// RaftHardStateKey returns a system-local key for a Raft HardState.
func RaftHardStateKey(rangeID roachpb.RangeID) roachpb.Key {
return MakeRangeIDPrefixBuf(rangeID).RaftHardStateKey()
Expand Down Expand Up @@ -802,8 +811,8 @@ func (b RangeIDPrefixBuf) AbortSpanKey(txnID uuid.UUID) roachpb.Key {
return encoding.EncodeBytesAscending(key, txnID.GetBytes())
}

// RaftTombstoneKey returns a system-local key for a raft tombstone.
func (b RangeIDPrefixBuf) RaftTombstoneKey() roachpb.Key {
// RaftTombstoneIncorrectLegacyKey returns a system-local key for a raft tombstone.
func (b RangeIDPrefixBuf) RaftTombstoneIncorrectLegacyKey() roachpb.Key {
return append(b.replicatedPrefix(), LocalRaftTombstoneSuffix...)
}

Expand Down Expand Up @@ -849,6 +858,11 @@ func (b RangeIDPrefixBuf) RangeTxnSpanGCThresholdKey() roachpb.Key {
return append(b.replicatedPrefix(), LocalTxnSpanGCThresholdSuffix...)
}

// RaftTombstoneKey returns a system-local key for a raft tombstone.
func (b RangeIDPrefixBuf) RaftTombstoneKey() roachpb.Key {
return append(b.unreplicatedPrefix(), LocalRaftTombstoneSuffix...)
}

// RaftHardStateKey returns a system-local key for a Raft HardState.
func (b RangeIDPrefixBuf) RaftHardStateKey() roachpb.Key {
return append(b.unreplicatedPrefix(), LocalRaftHardStateSuffix...)
Expand Down
3 changes: 2 additions & 1 deletion pkg/keys/printer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestPrettyPrint(t *testing.T) {
{StoreSuggestedCompactionKey(roachpb.Key("a"), MaxKey), `/Local/Store/suggestedCompaction/{"a"-/Max}`},

{AbortSpanKey(roachpb.RangeID(1000001), txnID), fmt.Sprintf(`/Local/RangeID/1000001/r/AbortSpan/%q`, txnID)},
{RaftTombstoneKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/r/RaftTombstone"},
{RaftTombstoneIncorrectLegacyKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/r/RaftTombstone"},
{RaftAppliedIndexKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/r/RaftAppliedIndex"},
{LeaseAppliedIndexKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/r/LeaseAppliedIndex"},
{RaftTruncatedStateKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/r/RaftTruncatedState"},
Expand All @@ -64,6 +64,7 @@ func TestPrettyPrint(t *testing.T) {

{RaftHardStateKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/u/RaftHardState"},
{RaftLastIndexKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/u/RaftLastIndex"},
{RaftTombstoneKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/u/RaftTombstone"},
{RaftLogKey(roachpb.RangeID(1000001), uint64(200001)), "/Local/RangeID/1000001/u/RaftLog/logIndex:200001"},
{RangeLastReplicaGCTimestampKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/u/RangeLastReplicaGCTimestamp"},
{RangeLastVerificationTimestampKeyDeprecated(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/u/RangeLastVerificationTimestamp"},
Expand Down
9 changes: 7 additions & 2 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,8 @@ func (n *Node) start(
n.initDescriptor(addr, attrs, locality)

n.storeCfg.Settings.Version.OnChange(func(cv cluster.ClusterVersion) {
if err := n.stores.WriteClusterVersion(ctx, cv); err != nil {
log.Fatalf(ctx, "error updating persisted cluster version: %s", err)
if err := n.stores.OnClusterVersionChange(ctx, cv); err != nil {
log.Fatal(ctx, errors.Wrapf(err, "updating cluster version to %s", cv))
}
})

Expand Down Expand Up @@ -423,6 +423,11 @@ func (n *Node) start(
n.startedAt = n.storeCfg.Clock.Now().WallTime

n.startComputePeriodicMetrics(n.stopper, DefaultMetricsSampleInterval)
// Be careful about moving this line above `startStores`; store migrations rely
// on the fact that the cluster version has not been updated via Gossip (we
// have migrations that want to run only if the server starts with a given
// cluster version, but not if the server starts with a lower one and gets
// bumped immediately, which would be possible if gossip got started earlier).
n.startGossip(ctx, n.stopper)

log.Infof(ctx, "%s: started with %v engine(s) and attributes %v", n, bootstrappedEngines, attrs.Attrs)
Expand Down
66 changes: 62 additions & 4 deletions pkg/server/version_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ import (
gosql "database/sql"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
Expand Down Expand Up @@ -124,10 +127,51 @@ func TestClusterVersionUpgrade1_0To1_2(t *testing.T) {
tc := setupMixedCluster(t, bootstrapVersion, versions)
defer tc.Stopper().Stop(ctx)

tombstone := roachpb.RaftTombstone{NextReplicaID: 0}
tombstoneOp := func(f func(engine.Engine, roachpb.Key) error) error {
visitor := func(s *storage.Store) error {
// Put down a few fake legacy tombstones (#12154) to jog `migrateLegacyTombstones`, including
// one for the existing range, but also some for non-existing ones.

legacyTombstoneKeys := []roachpb.Key{
// Range 1 actually exists, at least on some stores.
keys.RaftTombstoneIncorrectLegacyKey(1 /* rangeID */),
// These ranges don't exist.
keys.RaftTombstoneIncorrectLegacyKey(200 /* rangeID */),
keys.RaftTombstoneIncorrectLegacyKey(300 /* rangeID */),
}
for _, legacyTombstoneKey := range legacyTombstoneKeys {
if err := f(s.Engine(), legacyTombstoneKey); err != nil {
return err
}
}
return nil
}
for i := 0; i < tc.NumServers(); i++ {
if err := tc.Server(i).GetStores().(*storage.Stores).VisitStores(visitor); err != nil {
return err
}
}
return nil
}

for i := 0; i < tc.NumServers(); i++ {
if exp, version := bootstrapVersion.MinimumVersion.String(), tc.getVersionFromShow(i); version != exp {
t.Fatalf("%d: incorrect version %s (wanted %s)", i, version, exp)
}

// Put some legacy tombstones down. We're going to test the migration for removing those in the
// negative: the tombstones are to be removed only after a node at *cluster version* v1.2 boots
// up. We don't restart nodes in this test, so the only boot is at the initial version v1.0,
// and we verify that the tombstones remain. The rewrite functionality is tested in
// TestStoreInitAndBootstrap.
if err := tombstoneOp(func(eng engine.Engine, legacyTombstoneKey roachpb.Key) error {
return engine.MVCCPutProto(
ctx, eng, nil /* ms */, legacyTombstoneKey, hlc.Timestamp{}, nil /* txn */, &tombstone,
)
}); err != nil {
t.Fatal(err)
}
}

for _, newVersion := range []roachpb.Version{
Expand Down Expand Up @@ -175,16 +219,30 @@ func TestClusterVersionUpgrade1_0To1_2(t *testing.T) {
vers := tc.getVersionFromSetting(i)
if v := vers.Version().MinimumVersion.String(); v == curVersion {
if isNoopUpdate {
// Just what we wanted!
return nil
continue
}
return errors.Errorf("%d: still waiting for %s (now at %s)", i, exp, v)
} else if v != exp {
t.Fatalf("%d: should never see version %s (wanted %s)", i, v, exp)
}
}
// Everyone is at the version we're waiting for.
return nil

// Everyone is at the version we're waiting for. Check that the tombstones are still there.
return tombstoneOp(func(eng engine.Engine, legacyTombstoneKey roachpb.Key) error {
ok, err := engine.MVCCGetProto(
ctx, eng, legacyTombstoneKey, hlc.Timestamp{}, true /* consistent */, nil, /* txn */
nil,
)
if err != nil {
return err
}
if !ok {
return errors.Errorf(
"legacy tombstone at %s unexpectedly removed", legacyTombstoneKey,
)
}
return nil
})
})

// Since the wrapped version setting exposes the new versions, it must
Expand Down
6 changes: 6 additions & 0 deletions pkg/settings/cluster/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ const (
VersionClearRange
VersionPartitioning
VersionLeaseSequence
VersionUnreplicatedTombstoneKey

// Add new versions here (step one of two).

Expand Down Expand Up @@ -140,6 +141,11 @@ var versionsSingleton = keyedVersions([]keyedVersion{
Key: VersionLeaseSequence,
Version: roachpb.Version{Major: 1, Minor: 1, Unstable: 8},
},
{
// VersionUnreplicatedTombstoneKey is https://github.com/cockroachdb/cockroach/pull/21120.
Key: VersionUnreplicatedTombstoneKey,
Version: roachpb.Version{Major: 1, Minor: 1, Unstable: 9},
},

// Add new versions here (step two of two).

Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ select crdb_internal.set_vmodule('')
query T
select crdb_internal.node_executable_version()
----
1.1-8
1.1-9

query ITTT colnames
select node_id, component, field, regexp_replace(regexp_replace(value, '^\d+$', '<port>'), e':\\d+', ':<port>') as value from crdb_internal.node_runtime_info
Expand Down Expand Up @@ -270,4 +270,4 @@ select * from crdb_internal.gossip_liveness
query T
select crdb_internal.node_executable_version()
----
1.1-8
1.1-9
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ timeseries.storage.enabled true b if set,
trace.debug.enable false b if set, traces for recent requests can be seen in the /debug page
trace.lightstep.token · s if set, traces go to Lightstep using this token
trace.zipkin.collector · s if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'); ignored if trace.lightstep.token is set.
version 1.1-8 m set the active cluster version in the format '<major>.<minor>'.
version 1.1-9 m set the active cluster version in the format '<major>.<minor>'.

query T colnames
SELECT * FROM [SHOW SESSION_USER]
Expand Down
84 changes: 84 additions & 0 deletions pkg/storage/migrations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2017 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package storage

import (
"context"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
)

// clearLegacyTombstone removes the legacy tombstone for the given rangeID, taking
// care to do this without reading from the engine (as is required by one of the
// callers).
func clearLegacyTombstone(eng engine.Writer, rangeID roachpb.RangeID) error {
return eng.Clear(engine.MakeMVCCMetadataKey(keys.RaftTombstoneIncorrectLegacyKey(rangeID)))
}

// migrateLegacyTombstones rewrites all legacy tombstones into the correct key.
// It can be removed in binaries post v2.1.
func migrateLegacyTombstones(ctx context.Context, eng engine.Engine) error {
var tombstone roachpb.RaftTombstone
handleTombstone := func(rangeID roachpb.RangeID) (more bool, _ error) {
batch := eng.NewBatch()
defer batch.Close()

tombstoneKey := keys.RaftTombstoneKey(rangeID)

{
// If there's already a new-style tombstone, pick the larger NextReplicaID
// (this will be the new-style tombstone's).
var exTombstone roachpb.RaftTombstone
ok, err := engine.MVCCGetProto(ctx, batch, tombstoneKey,
hlc.Timestamp{}, true, nil, &exTombstone)
if err != nil {
return false, err
}
if ok && exTombstone.NextReplicaID > tombstone.NextReplicaID {
tombstone.NextReplicaID = exTombstone.NextReplicaID
}
}

if err := engine.MVCCPutProto(ctx, batch, nil, tombstoneKey,
hlc.Timestamp{}, nil, &tombstone); err != nil {
return false, err
}
if err := clearLegacyTombstone(batch, rangeID); err != nil {
return false, err
}
// Specify sync==false because we don't want to sync individually,
// but see the end of the surrounding method where we sync explicitly.
err := batch.Commit(false /* sync */)
return err == nil, err
}
err := IterateIDPrefixKeys(ctx, eng, keys.RaftTombstoneIncorrectLegacyKey, &tombstone,
handleTombstone)
if err != nil {
return err
}

// Write a final bogus batch so that we get to do a sync commit, which
// implicitly also syncs everything written before.
batch := eng.NewBatch()
defer batch.Close()

if err := clearLegacyTombstone(batch, 1 /* rangeID */); err != nil {
return err
}
return batch.Commit(true /* sync */)
}
1 change: 1 addition & 0 deletions pkg/storage/rditer/replica_data_iter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ func createRangeData(
{keys.LeaseAppliedIndexKey(desc.RangeID), ts0},
{keys.RangeStatsKey(desc.RangeID), ts0},
{keys.RangeTxnSpanGCThresholdKey(desc.RangeID), ts0},
{keys.RaftTombstoneKey(desc.RangeID), ts0},
{keys.RaftHardStateKey(desc.RangeID), ts0},
{keys.RaftLastIndexKey(desc.RangeID), ts0},
{keys.RaftLogKey(desc.RangeID, 1), ts0},
Expand Down
4 changes: 4 additions & 0 deletions pkg/storage/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,11 @@ func (r *Replica) setTombstoneKey(
nextReplicaID := r.nextReplicaIDLocked(desc)
r.mu.minReplicaID = nextReplicaID
r.mu.Unlock()

tombstoneKey := keys.RaftTombstoneKey(desc.RangeID)
if !r.store.cfg.Settings.Version.IsMinSupported(cluster.VersionUnreplicatedTombstoneKey) {
tombstoneKey = keys.RaftTombstoneIncorrectLegacyKey(desc.RangeID)
}
tombstone := &roachpb.RaftTombstone{
NextReplicaID: nextReplicaID,
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/storage/replica_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ func (r *Replica) sha512(
snap engine.Reader,
snapshot *roachpb.RaftSnapshotData,
) (*replicaHash, error) {
tombstoneKey := engine.MakeMVCCMetadataKey(keys.RaftTombstoneKey(desc.RangeID))
legacyTombstoneKey := engine.MakeMVCCMetadataKey(keys.RaftTombstoneIncorrectLegacyKey(desc.RangeID))

// Iterate over all the data in the range.
iter := snap.NewIterator(false /* prefix */)
Expand All @@ -350,7 +350,7 @@ func (r *Replica) sha512(

var legacyTimestamp hlc.LegacyTimestamp
visitor := func(unsafeKey engine.MVCCKey, unsafeValue []byte) error {
if unsafeKey.Equal(tombstoneKey) {
if unsafeKey.Equal(legacyTombstoneKey) {
// Skip the tombstone key which is marked as replicated even though it
// isn't.
//
Expand Down
10 changes: 10 additions & 0 deletions pkg/storage/replica_raftstorage.go
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,16 @@ func (r *Replica) applySnapshot(
}
}

// Nodes before v2.0 may send an incorrect Raft tombstone (see #12154) that was supposed
// to be unreplicated. Simply remove it.
//
// NB: this can be removed in v2.1. This is because when we are running a binary at
// v2.1, we know that peers are at least running v2.0, which will never send out these
// snapshots.
if err := clearLegacyTombstone(batch, r.RangeID); err != nil {
return errors.Wrap(err, "while clearing legacy tombstone key")
}

// The log entries are all written to distinct keys so we can use a
// distinct batch.
distinctBatch := batch.Distinct()
Expand Down
Loading

0 comments on commit 3dc1d24

Please sign in to comment.