Skip to content

Commit

Permalink
Merge pull request cockroachdb#21070 from tschottdorf/fix-gcbytesage-…
Browse files Browse the repository at this point in the history
…stage2

engine: better randomized MVCCStats testing; fix bugs
  • Loading branch information
tbg authored Dec 29, 2017
2 parents 0e999d6 + 50d7474 commit eb5b878
Show file tree
Hide file tree
Showing 13 changed files with 862 additions and 158 deletions.
1 change: 0 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ kv.allocator.range_rebalance_threshold 5E-02 f minimum
kv.allocator.stat_based_rebalancing.enabled false b set to enable rebalancing of range replicas based on write load and disk usage
kv.allocator.stat_rebalance_threshold 2E-01 f minimum fraction away from the mean a store's stats (like disk usage or writes per second) can be before it is considered overfull or underfull
kv.bulk_io_write.max_rate 8.0 EiB z the rate limit (bytes/sec) to use for writes to disk on behalf of bulk io ops
kv.gc.batch_size 100000 i maximum number of keys in a batch for MVCC garbage collection
kv.raft.command.max_size 64 MiB z maximum size of a raft command
kv.raft_log.synchronize true b set to true to synchronize on Raft log writes to persistent storage
kv.range_descriptor_cache.size 1000000 i maximum number of entries in the range descriptor and leaseholder caches
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/metric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ func TestQueryCounts(t *testing.T) {
accum := queryCounter{
selectCount: 2, // non-zero due to migrations
insertCount: 6, // non-zero due to migrations
deleteCount: 1, // non-zero due to migrations
ddlCount: s.MustGetSQLCounter(sql.MetaDdl.Name),
miscCount: s.MustGetSQLCounter(sql.MetaMisc.Name),
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/sqlmigrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ var backwardCompatibleMigrations = []migrationDescriptor{
name: "upgrade table descs to interleaved format version",
workFn: upgradeTableDescsToInterleavedFormatVersion,
},
{
name: "remove cluster setting `kv.gc.batch_size`",
workFn: purgeClusterSettingKVGCBatchSize,
},
}

// migrationDescriptor describes a single migration hook that's used to modify
Expand Down Expand Up @@ -918,3 +922,8 @@ func upgradeTableDescsToInterleavedFormatVersion(ctx context.Context, r runner)
}
return nil
}

func purgeClusterSettingKVGCBatchSize(ctx context.Context, r runner) error {
// This cluster setting has been removed.
return runStmtAsRootWithRetry(ctx, r, `DELETE FROM SYSTEM.SETTINGS WHERE name='kv.gc.batch_size'`)
}
27 changes: 27 additions & 0 deletions pkg/sqlmigrations/migrations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,33 @@ func (mt *isolatedMigrationTest) close(ctx context.Context) {
backwardCompatibleMigrations = mt.oldMigrations
}

func TestRemoveClusterSettingKVGCBatchSize(t *testing.T) {
defer leaktest.AfterTest(t)()
ctx := context.Background()

mt := makeIsolatedMigrationTest(ctx, t, "remove cluster setting `kv.gc.batch_size`")
defer mt.close(ctx)

mt.start(t, base.TestServerArgs{})

mt.sqlDB.Exec(t, `INSERT INTO system.settings (name, "lastUpdated", "valueType", value) `+
`values('kv.gc.batch_size', NOW(), 'i', '10000')`)

if err := mt.runMigration(ctx); err != nil {
t.Fatal(err)
}
var n int
if err := mt.sqlDB.DB.QueryRow(
`SELECT COUNT(*) from system.settings WHERE name = 'kv.gc.batch_size'`,
).Scan(&n); err != nil {
t.Fatal(err)
}

if n != 0 {
t.Fatalf("migration did not clear out cluster setting, got %d rows", n)
}
}

func TestCreateSystemTable(t *testing.T) {
defer leaktest.AfterTest(t)()
ctx := context.Background()
Expand Down
17 changes: 8 additions & 9 deletions pkg/storage/batcheval/cmd_clear_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestCmdClearRangeBytesThreshold(t *testing.T) {
var stats enginepb.MVCCStats
for i := 0; i < test.keyCount; i++ {
key := roachpb.Key(fmt.Sprintf("%04d", i))
if err := engine.MVCCPut(ctx, eng, &stats, key, hlc.Timestamp{}, value, nil); err != nil {
if err := engine.MVCCPut(ctx, eng, &stats, key, hlc.Timestamp{WallTime: int64(i % 2)}, value, nil); err != nil {
t.Fatal(err)
}
}
Expand All @@ -115,21 +115,20 @@ func TestCmdClearRangeBytesThreshold(t *testing.T) {
EndKey: endKey,
},
}
var delta enginepb.MVCCStats
cArgs.Stats = &delta
cArgs.Stats = &enginepb.MVCCStats{}

if _, err := ClearRange(ctx, batch, cArgs, &roachpb.ClearRangeResponse{}); err != nil {
t.Fatal(err)
}

// Verify delta is equal to the stats we wrote.
// Verify cArgs.Stats is equal to the stats we wrote.
newStats := stats
newStats.SysBytes, newStats.SysCount = 0, 0 // ignore these values
delta.SysBytes, delta.SysCount = 0, 0 // these too, as GC threshold is updated
delta.AgeTo(newStats.LastUpdateNanos)
newStats.Add(delta)
newStats.SysBytes, newStats.SysCount = 0, 0 // ignore these values
cArgs.Stats.SysBytes, cArgs.Stats.SysCount = 0, 0 // these too, as GC threshold is updated
newStats.Add(*cArgs.Stats)
newStats.AgeTo(0) // pin at LastUpdateNanos==0
if !newStats.Equal(enginepb.MVCCStats{}) {
t.Errorf("expected stats on original writes to be negated on clear range: %+v vs %+v", stats, delta)
t.Errorf("expected stats on original writes to be negated on clear range: %+v vs %+v", stats, *cArgs.Stats)
}

// Verify we see the correct counts for Clear and ClearRange.
Expand Down
7 changes: 0 additions & 7 deletions pkg/storage/batcheval/cmd_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/storage/batcheval/result"
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/storage/spanset"
Expand All @@ -31,11 +30,6 @@ func init() {
RegisterCommand(roachpb.GC, declareKeysGC, GC)
}

var gcBatchSize = settings.RegisterIntSetting("kv.gc.batch_size",
"maximum number of keys in a batch for MVCC garbage collection",
100000,
)

func declareKeysGC(
desc roachpb.RangeDescriptor, header roachpb.Header, req roachpb.Request, spans *spanset.SpanSet,
) {
Expand Down Expand Up @@ -90,7 +84,6 @@ func GC(
// Garbage collect the specified keys by expiration timestamps.
if err := engine.MVCCGarbageCollect(
ctx, batch, cArgs.Stats, keys, h.Timestamp,
gcBatchSize.Get(&cArgs.EvalCtx.ClusterSettings().SV),
); err != nil {
return result.Result{}, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/engine/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ func BenchmarkMVCCGarbageCollect(b *testing.B) {

b.StartTimer()
if err := MVCCGarbageCollect(
ctx, engine, &enginepb.MVCCStats{}, gcKeys, now, math.MaxInt64,
ctx, engine, &enginepb.MVCCStats{}, gcKeys, now,
); err != nil {
b.Fatal(err)
}
Expand Down
24 changes: 15 additions & 9 deletions pkg/storage/engine/enginepb/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,21 @@ func (ms MVCCStats) GCByteAge(nowNanos int64) int64 {
return ms.GCBytesAge
}

// Forward is like AgeTo, but if nowNanos is not ahead of ms.LastUpdateNanos,
// this method is a noop.
func (ms *MVCCStats) Forward(nowNanos int64) {
if ms.LastUpdateNanos >= nowNanos {
return
}
ms.AgeTo(nowNanos)
}

// AgeTo encapsulates the complexity of computing the increment in age
// quantities contained in MVCCStats. Two MVCCStats structs only add and
// subtract meaningfully if their LastUpdateNanos matches, so aging them to
// the max of their LastUpdateNanos is a prerequisite.
// If nowNanos is behind ms.LastUpdateNanos, this method is a noop.
// the max of their LastUpdateNanos is a prerequisite, though Add() takes
// care of this internally.
func (ms *MVCCStats) AgeTo(nowNanos int64) {
if ms.LastUpdateNanos >= nowNanos {
return
}
// Seconds are counted every time each individual nanosecond timestamp
// crosses a whole second boundary (i.e. is zero mod 1E9). Thus it would
// be a mistake to use the (nonequivalent) expression (a-b)/1E9.
Expand All @@ -76,8 +82,8 @@ func (ms *MVCCStats) AgeTo(nowNanos int64) {
func (ms *MVCCStats) Add(oms MVCCStats) {
// Enforce the max LastUpdateNanos for both ages based on their
// pre-addition state.
ms.AgeTo(oms.LastUpdateNanos)
oms.AgeTo(ms.LastUpdateNanos)
ms.Forward(oms.LastUpdateNanos)
oms.Forward(ms.LastUpdateNanos) // on local copy
// If either stats object contains estimates, their sum does too.
ms.ContainsEstimates = ms.ContainsEstimates || oms.ContainsEstimates
// Now that we've done that, we may just add them.
Expand All @@ -100,8 +106,8 @@ func (ms *MVCCStats) Add(oms MVCCStats) {
func (ms *MVCCStats) Subtract(oms MVCCStats) {
// Enforce the max LastUpdateNanos for both ages based on their
// pre-subtraction state.
ms.AgeTo(oms.LastUpdateNanos)
oms.AgeTo(ms.LastUpdateNanos)
ms.Forward(oms.LastUpdateNanos)
oms.Forward(ms.LastUpdateNanos)
// If either stats object contains estimates, their difference does too.
ms.ContainsEstimates = ms.ContainsEstimates || oms.ContainsEstimates
// Now that we've done that, we may subtract.
Expand Down
79 changes: 57 additions & 22 deletions pkg/storage/engine/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,13 @@ func updateStatsOnPut(
// Remove current live counts for this key.
if orig != nil {
if sys {
ms.SysBytes -= (origMetaKeySize + origMetaValSize)
ms.SysBytes -= origMetaKeySize + origMetaValSize
if orig.Txn != nil {
// If the original value was an intent, we're replacing the
// intent. Note that since it's a system key, it doesn't affect
// IntentByte, IntentCount, and correspondingly, IntentAge.
ms.SysBytes -= orig.KeyBytes + orig.ValBytes
}
ms.SysCount--
} else {
// Move the (so far empty) stats to the timestamp at which the
Expand Down Expand Up @@ -267,8 +273,18 @@ func updateStatsOnPut(
}

// Move the stats to the new meta's timestamp. If we had an orig meta, this
// ages those original stats by the time which the previous version was live.
// ages those original stats by the time which the previous version was
// live.
//
// Note that there is an interesting special case here: it's possible that
// meta.Timestamp.WallTime < orig.Timestamp.WallTime. This wouldn't happen
// outside of tests (due to our semantics of txn.OrigTimestamp, which never
// decreases) but it sure does happen in randomized testing. An earlier
// version of the code used `Forward` here, which is incorrect as it would be
// a no-op and fail to subtract out the intent bytes/GC age incurred due to
// removing the meta entry at `orig.Timestamp` (when `orig != nil`).
ms.AgeTo(meta.Timestamp.WallTime)

if sys {
ms.SysBytes += meta.KeyBytes + meta.ValBytes + metaKeySize + metaValSize
ms.SysCount++
Expand Down Expand Up @@ -321,28 +337,49 @@ func updateStatsOnResolve(
ms.AgeTo(orig.Timestamp.WallTime)
sys := isSysLocal(key)

if orig.Deleted != meta.Deleted {
log.Fatalf(context.TODO(), "on resolve, original meta was deleted=%t, but new one is deleted=%t",
orig.Deleted, meta.Deleted)
}

if sys {
ms.SysBytes += metaKeySize + metaValSize - origMetaValSize - origMetaKeySize
ms.AgeTo(meta.Timestamp.WallTime)
ms.SysBytes += (metaKeySize + metaValSize) - (origMetaValSize + origMetaKeySize)
} else {
if !meta.Deleted {
ms.LiveBytes += metaKeySize + metaValSize - origMetaValSize - origMetaKeySize
}
// At orig.Timestamp, the original meta key disappears.
ms.KeyBytes -= origMetaKeySize + orig.KeyBytes
ms.ValBytes -= origMetaValSize + orig.ValBytes

// If committing, subtract out intent counts.
if commit {
ms.IntentBytes -= (meta.KeyBytes + meta.ValBytes)
ms.IntentCount--
ms.IntentBytes -= orig.KeyBytes + orig.ValBytes
ms.IntentCount--

// If the old intent is a deletion, then the key already isn't tracked
// in LiveBytes any more (and the new intent/value is also a deletion).
// If we're looking at a non-deletion intent/value, update the live
// bytes to account for the difference between the previous intent and
// the new intent/value.
if !meta.Deleted {
ms.LiveBytes -= (origMetaKeySize + origMetaValSize) + (orig.KeyBytes + meta.ValBytes)
}

ms.AgeTo(meta.Timestamp.WallTime)

// At meta.Timestamp, the new meta key appears.
ms.KeyBytes += metaKeySize + meta.KeyBytes
ms.ValBytes += metaValSize + meta.ValBytes

if !commit {
// If not committing, the intent reappears (but at meta.Timestamp).
//
// This is the case in which an intent is pushed (a similar case
// happens when an intent is overwritten, but that's handled in
// updateStatsOnPut, not this method).
ms.IntentBytes += meta.KeyBytes + meta.ValBytes
ms.IntentCount++
}

if !meta.Deleted {
ms.LiveBytes += (metaKeySize + metaValSize) + (meta.KeyBytes + meta.ValBytes)
}
}
return ms
}
Expand Down Expand Up @@ -2198,15 +2235,12 @@ func MVCCResolveWriteIntentRangeUsingIter(
// keys slice. The engine iterator is seeked in turn to each listed
// key, clearing all values with timestamps <= to expiration. The
// timestamp parameter is used to compute the intent age on GC.
// Garbage collection stops after clearing maxClears values
// (to limit the size of the WriteBatch produced).
func MVCCGarbageCollect(
ctx context.Context,
engine ReadWriter,
ms *enginepb.MVCCStats,
keys []roachpb.GCRequest_GCKey,
timestamp hlc.Timestamp,
maxClears int64,
) error {
// We're allowed to use a prefix iterator because we always Seek() the
// iterator when handling a new user key.
Expand All @@ -2215,7 +2249,7 @@ func MVCCGarbageCollect(

var count int64
defer func(begin time.Time) {
log.Eventf(ctx, "done with GC evaluation for %d keys at %.2f keys/sec. Deleted %d versions",
log.Eventf(ctx, "done with GC evaluation for %d keys at %.2f keys/sec. Deleted %d entries",
len(keys), float64(len(keys))*1E9/float64(timeutil.Since(begin)), count)
}(timeutil.Now())

Expand All @@ -2233,6 +2267,13 @@ func MVCCGarbageCollect(
inlinedValue := meta.IsInline()
implicitMeta := iter.UnsafeKey().IsValue()
// First, check whether all values of the key are being deleted.
//
// Note that we naively can't terminate GC'ing keys loop early if we
// enter this branch, as it will update the stats under the provision
// that the (implicit or explicit) meta key (and thus all versions) are
// being removed. We had this faulty functionality at some point; it
// should no longer be necessary since the higher levels already make
// sure each individual GCRequest does bounded work.
if !gcKey.Timestamp.Less(hlc.Timestamp(meta.Timestamp)) {
// For version keys, don't allow GC'ing the meta key if it's
// not marked deleted. However, for inline values we allow it;
Expand All @@ -2258,9 +2299,6 @@ func MVCCGarbageCollect(
return err
}
count++
if count >= maxClears {
return nil
}
}
}

Expand Down Expand Up @@ -2289,13 +2327,10 @@ func MVCCGarbageCollect(
int64(len(iter.UnsafeValue())), nil, unsafeIterKey.Timestamp.WallTime,
timestamp.WallTime))
}
count++
if err := engine.Clear(unsafeIterKey); err != nil {
return err
}
count++
if count >= maxClears {
return nil
}
}
}
}
Expand Down
Loading

0 comments on commit eb5b878

Please sign in to comment.