diff --git a/compaction.go b/compaction.go index 703184dc82..679aa7fea1 100644 --- a/compaction.go +++ b/compaction.go @@ -66,6 +66,27 @@ type compactionLevel struct { files manifest.LevelSlice } +// Return output from compactionOutputSplitters. See comment on +// compactionOutputSplitter.shouldSplitBefore() on how this value is used. +type compactionSplitSuggestion int + +const( + noSplit compactionSplitSuggestion = iota + splitSoon + splitNow +) + +// String implements the Stringer interface. +func (c compactionSplitSuggestion) String() string { + switch c { + case noSplit: + return "no-split" + case splitSoon: + return "split-soon" + } + return "split-now" +} + // compactionOutputSplitter is an interface for encapsulating logic around // switching the output of a compaction to a new output file. Additional // constraints around switching compaction outputs that are specific to that @@ -73,8 +94,13 @@ type compactionLevel struct { // compactionOutputSplitters that compose other child compactionOutputSplitters. type compactionOutputSplitter interface { // shouldSplitBefore returns whether we should split outputs before the - // specified "current key". - shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool + // specified "current key". The return value is one of splitNow, splitSoon, + // or noSlit. splitNow means a split is advised before the specified key, + // splitSoon means no split is advised yet but the limit returned in + // onNewOutput can be considered invalidated and a splitNow suggestion will + // be made on an upcoming key shortly, and noSplit means no split is + // advised. + shouldSplitBefore(key *InternalKey, tw *sstable.Writer) compactionSplitSuggestion // onNewOutput updates internal splitter state when the compaction switches // to a new sstable, and returns the next limit for the new output which // would get used to truncate range tombstones if the compaction iterator @@ -93,12 +119,15 @@ type fileSizeSplitter struct { maxFileSize uint64 } -func (f *fileSizeSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool { +func (f *fileSizeSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) compactionSplitSuggestion { // The Kind != RangeDelete part exists because EstimatedSize doesn't grow // rightaway when a range tombstone is added to the fragmenter. It's always // better to make a sequence of range tombstones visible to the fragmenter. - return key.Kind() != InternalKeyKindRangeDelete && tw != nil && - tw.EstimatedSize() >= f.maxFileSize + if key.Kind() != InternalKeyKindRangeDelete && tw != nil && + tw.EstimatedSize() >= f.maxFileSize { + return splitNow + } + return noSplit } func (f *fileSizeSplitter) onNewOutput(key *InternalKey) []byte { @@ -111,8 +140,11 @@ type grandparentLimitSplitter struct { limit []byte } -func (g *grandparentLimitSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool { - return g.limit != nil && g.c.cmp(key.UserKey, g.limit) > 0 +func (g *grandparentLimitSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) compactionSplitSuggestion { + if g.limit != nil && g.c.cmp(key.UserKey, g.limit) > 0 { + return splitNow + } + return noSplit } func (g *grandparentLimitSplitter) onNewOutput(key *InternalKey) []byte { @@ -203,8 +235,11 @@ type l0LimitSplitter struct { limit []byte } -func (l *l0LimitSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool { - return l.limit != nil && l.c.cmp(key.UserKey, l.limit) > 0 +func (l *l0LimitSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) compactionSplitSuggestion { + if l.limit != nil && l.c.cmp(key.UserKey, l.limit) > 0 { + return splitNow + } + return noSplit } func (l *l0LimitSplitter) onNewOutput(key *InternalKey) []byte { @@ -244,13 +279,17 @@ type splitterGroup struct { splitters []compactionOutputSplitter } -func (a *splitterGroup) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool { +func (a *splitterGroup) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) (suggestion compactionSplitSuggestion) { + suggestion = noSplit for _, splitter := range a.splitters { - if splitter.shouldSplitBefore(key, tw) { - return true + switch splitter.shouldSplitBefore(key, tw) { + case splitNow: + return splitNow + case splitSoon: + suggestion = splitSoon } } - return false + return suggestion } func (a *splitterGroup) onNewOutput(key *InternalKey) []byte { @@ -281,17 +320,18 @@ type userKeyChangeSplitter struct { splitter compactionOutputSplitter } -func (u *userKeyChangeSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool { +func (u *userKeyChangeSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) compactionSplitSuggestion { if u.splitOnNextUserKey && u.cmp(u.savedKey, key.UserKey) != 0 { u.splitOnNextUserKey = false u.savedKey = u.savedKey[:0] - return true + return splitNow } - if u.splitter.shouldSplitBefore(key, tw) { + if split := u.splitter.shouldSplitBefore(key, tw); split == splitNow { u.splitOnNextUserKey = true u.savedKey = append(u.savedKey[:0], key.UserKey...) + return noSplit } - return false + return noSplit } func (u *userKeyChangeSplitter) onNewOutput(key *InternalKey) []byte { @@ -309,7 +349,7 @@ type nonZeroSeqNumSplitter struct { splitOnNonZeroSeqNum bool } -func (n *nonZeroSeqNumSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool { +func (n *nonZeroSeqNumSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) compactionSplitSuggestion { curSeqNum := key.SeqNum() keyKind := key.Kind() prevPointSeqNum := n.prevPointSeqNum @@ -320,16 +360,17 @@ func (n *nonZeroSeqNumSplitter) shouldSplitBefore(key *InternalKey, tw *sstable. if n.splitOnNonZeroSeqNum { if prevPointSeqNum > 0 || n.c.rangeDelFrag.Empty() { n.splitOnNonZeroSeqNum = false - return true + return splitNow } - } else if n.splitter.shouldSplitBefore(key, tw) { + } else if split := n.splitter.shouldSplitBefore(key, tw); split == splitNow { userKeyChange := curSeqNum > prevPointSeqNum if prevPointSeqNum > 0 || n.c.rangeDelFrag.Empty() || userKeyChange { - return true + return splitNow } n.splitOnNonZeroSeqNum = true + return splitSoon } - return false + return noSplit } func (n *nonZeroSeqNumSplitter) onNewOutput(key *InternalKey) []byte { @@ -2146,18 +2187,31 @@ func (d *DB) runCompaction( // Each inner loop iteration processes one key from the input iterator. prevPointSeqNum := InternalKeySeqNumMax for ; key != nil; key, val = iter.Next() { - if splitter.shouldSplitBefore(key, tw) { - limit = key.UserKey - if splittingFlush { - // Flush all tombstones up until key.UserKey, and - // truncate them at that key. - // - // The fragmenter could save the passed-in key. As this - // key could live beyond the write into the current - // sstable output file, make a copy. - c.rangeDelFrag.TruncateAndFlushTo(key.Clone().UserKey) + if split := splitter.shouldSplitBefore(key, tw); split != noSplit { + if split == splitNow { + limit = key.UserKey + if splittingFlush { + // Flush all tombstones up until key.UserKey, and + // truncate them at that key. + // + // The fragmenter could save the passed-in key. As this + // key could live beyond the write into the current + // sstable output file, make a copy. + c.rangeDelFrag.TruncateAndFlushTo(key.Clone().UserKey) + } + break } - break + // split == splitSoon + // + // Invalidate the limit here. It has probably been exceeded + // by the current key, but we can't split just yet, such as to + // maintain the nonzero sequence number invariant mentioned + // above. Setting limit to nil is okay as it's just a transient + // setting, as when split eventually equals splitNow, we will + // set the limit to the key after that. If the compaction were + // to run out of keys before we get to that point, limit would + // be nil as it should be for all end-of-compaction cases. + limit = nil } atomic.StoreUint64(c.atomicBytesIterated, c.bytesIterated) diff --git a/compaction_test.go b/compaction_test.go index 9ec7c84194..a249c46843 100644 --- a/compaction_test.go +++ b/compaction_test.go @@ -1959,10 +1959,10 @@ func TestCompactionCheckOrdering(t *testing.T) { } type mockSplitter struct { - guaranteesUserKeyChangeVal, shouldSplitVal bool + shouldSplitVal compactionSplitSuggestion } -func (m *mockSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) bool { +func (m *mockSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) compactionSplitSuggestion { return m.shouldSplitVal } @@ -2037,23 +2037,28 @@ func TestCompactionOutputSplitters(t *testing.T) { return "expected at least 2 args" } splitterToSet := (*pickSplitter(d.CmdArgs[0].Key)).(*mockSplitter) - val, err := strconv.ParseBool(d.CmdArgs[1].Key) - if err != nil { - t.Fatal(err) + var val compactionSplitSuggestion + switch d.CmdArgs[1].Key { + case "split-now": + val = splitNow + case "split-soon": + val = splitSoon + case "no-split": + val = noSplit + default: + t.Fatalf("unexpected value for should-split: %s", d.CmdArgs[1].Key) } splitterToSet.shouldSplitVal = val - case "set-user-key-change": - return "TODO: remove" case "should-split-before": if len(d.CmdArgs) < 1 { return "expected at least 1 arg" } key := base.ParseInternalKey(d.CmdArgs[0].Key) shouldSplit := main.shouldSplitBefore(&key, nil) - if shouldSplit { + if shouldSplit == splitNow { main.onNewOutput(&key) } - return fmt.Sprintf("%t", shouldSplit) + return fmt.Sprintf("%s", shouldSplit) default: return fmt.Sprintf("unknown command: %s", d.Cmd) } diff --git a/testdata/compaction_output_splitters b/testdata/compaction_output_splitters index 050429b3c8..6361d84a2b 100644 --- a/testdata/compaction_output_splitters +++ b/testdata/compaction_output_splitters @@ -17,41 +17,41 @@ init main array ---- ok -set-should-split child0 false +set-should-split child0 no-split ---- ok -set-should-split child1 false +set-should-split child1 no-split ---- ok should-split-before foo.SET.2 ---- -false +no-split -set-should-split child1 true +set-should-split child1 split-now ---- ok should-split-before foo.SET.2 ---- -true +split-now -set-should-split child1 false +set-should-split child1 no-split ---- ok should-split-before foo.SET.2 ---- -false +no-split -set-should-split child0 true +set-should-split child0 split-now ---- ok should-split-before foo.SET.2 ---- -true +split-now # userKeyChangeSplitter tests @@ -69,51 +69,51 @@ ok should-split-before foo.SET.6 ---- -false +no-split should-split-before foo.SET.5 ---- -false +no-split -set-should-split child0 true +set-should-split child0 split-now ---- ok should-split-before foo.SET.4 ---- -false +no-split should-split-before foo.SET.3 ---- -false +no-split should-split-before food.SET.6 ---- -true +split-now -set-should-split child0 false +set-should-split child0 no-split ---- ok should-split-before food.SET.5 ---- -false +no-split -set-should-split child0 true +set-should-split child0 split-now ---- ok should-split-before food.SET.4 ---- -false +no-split -set-should-split child0 false +set-should-split child0 no-split ---- ok should-split-before food2.SET.4 ---- -true +split-now # nonZeroSeqNumSplitter tests @@ -129,76 +129,76 @@ init main nonzeroseqnum tombstone ---- ok -set-should-split child0 false +set-should-split child0 no-split ---- ok should-split-before foo.SET.5 ---- -false +no-split should-split-before foo.RANGEDEL.0 ---- -false +no-split -set-should-split child0 true +set-should-split child0 split-now ---- ok -# This should be true, as the last point key is foo.SET.5. +# This should be split-now, as the last point key is foo.SET.5. should-split-before foo.SET.0 ---- -true +split-now -set-should-split child0 false +set-should-split child0 no-split ---- ok should-split-before food.SET.0 ---- -false +no-split -set-should-split child0 true +set-should-split child0 split-now ---- ok should-split-before food1.SET.0 ---- -false +split-soon -# Even though we've set should-split-before to false for the child splitter, +# Even though we've set should-split-before to no-split for the child splitter, # nonZeroSeqNumSplitter "remembers" it and splits at the next good split point. -set-should-split child0 false +set-should-split child0 no-split ---- ok should-split-before food2.SET.0 ---- -false +no-split should-split-before food3.SET.4 ---- -false +no-split -# This one should be true, as the previous point seqnum is nonzero. +# This one should be split-now, as the previous point seqnum is nonzero. should-split-before food4.SET.2 ---- -true +split-now should-split-before food4.SET.0 ---- -false +no-split -set-should-split child0 true +set-should-split child0 split-now ---- ok should-split-before food5.SET.3 ---- -true +split-now reset ---- @@ -219,18 +219,18 @@ init main nonzeroseqnum ---- ok -set-should-split child0 false +set-should-split child0 no-split ---- ok should-split-before food.SET.0 ---- -false +no-split -set-should-split child0 true +set-should-split child0 split-now ---- ok should-split-before food1.SET.0 ---- -true +split-now diff --git a/testdata/manual_compaction b/testdata/manual_compaction index 9e3512e384..f383a9e7ff 100644 --- a/testdata/manual_compaction +++ b/testdata/manual_compaction @@ -772,3 +772,47 @@ compact a-r L1 000007:[q#8,RANGEDEL-r#72057594037927935,RANGEDEL] 3: 000006:[q#6,SET-q#6,SET] + + +# Test a case where the output ends one key after a grandparent limit is +# crossed, but we couldn't have split outputs at that key due to the point +# key before that having a zero seqnum. +define target-file-sizes=(100, 100, 100) +L1 + a.RANGEDEL.10:b + b.SET.0:foo + d.RANGEDEL.0:e + j.SET.10:foo +L2 + f.RANGEDEL.7:g +L3 + c.SET.6:6 +L3 + c.SET.5:5 +L3 + c.SET.4:4 +L4 + a.SET.0:0 + f.SET.0:0 +---- +1: + 000004:[a-j] +2: + 000005:[f-g] +3: + 000006:[c-c] + 000007:[c-c] + 000008:[c-c] +4: + 000009:[a-f] + +compact a-r L1 +---- +2: + 000010:[a#10,RANGEDEL-j#10,SET] +3: + 000006:[c#6,SET-c#6,SET] + 000007:[c#5,SET-c#5,SET] + 000008:[c#4,SET-c#4,SET] +4: + 000009:[a#0,SET-f#0,SET]