diff --git a/compaction.go b/compaction.go index 8382c599f7..530e160b88 100644 --- a/compaction.go +++ b/compaction.go @@ -1905,15 +1905,27 @@ func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) { ve := &versionEdit{} var level int var err error + var fileToSplit *fileMetadata + var ingestSplitFiles []ingestSplitFile for _, file := range c.flushing[0].flushable.(*ingestedFlushable).files { - level, err = ingestTargetLevel( + suggestSplit := d.opts.Experimental.IngestSplit != nil && d.opts.Experimental.IngestSplit() && + d.FormatMajorVersion() >= FormatVirtualSSTables + level, fileToSplit, err = ingestTargetLevel( d.newIters, d.tableNewRangeKeyIter, iterOpts, d.cmp, c.version, baseLevel, d.mu.compact.inProgress, file.FileMetadata, + suggestSplit, ) if err != nil { return nil, err } ve.NewFiles = append(ve.NewFiles, newFileEntry{Level: level, Meta: file.FileMetadata}) + if fileToSplit != nil { + ingestSplitFiles = append(ingestSplitFiles, ingestSplitFile{ + ingestFile: file.FileMetadata, + splitFile: fileToSplit, + level: level, + }) + } levelMetrics := c.metrics[level] if levelMetrics == nil { levelMetrics = &LevelMetrics{} @@ -1923,6 +1935,28 @@ func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) { levelMetrics.TablesIngested++ } + updateLevelMetricsOnExcise := func(m *fileMetadata, level int, added []newFileEntry) { + levelMetrics := c.metrics[level] + if levelMetrics == nil { + levelMetrics = &LevelMetrics{} + c.metrics[level] = levelMetrics + } + levelMetrics.NumFiles-- + levelMetrics.Size -= int64(m.Size) + for i := range added { + levelMetrics.NumFiles++ + levelMetrics.Size += int64(added[i].Meta.Size) + } + } + + if len(ingestSplitFiles) > 0 { + ve.DeletedFiles = make(map[manifest.DeletedFileEntry]*manifest.FileMetadata) + replacedFiles := make(map[base.FileNum][]newFileEntry) + if err := d.ingestSplit(ve, updateLevelMetricsOnExcise, ingestSplitFiles, replacedFiles); err != nil { + return nil, err + } + } + return ve, nil } @@ -2093,6 +2127,24 @@ func (d *DB) flush1() (bytesFlushed uint64, err error) { metrics.BytesIn += d.mu.mem.queue[i].logSize } } + } else if len(ve.DeletedFiles) > 0 { + // c.kind == compactionKindIngestedFlushable && we have deleted files due + // to ingest-time splits. + // + // Iterate through all other compactions, and check if their inputs have + // been replaced due to an ingest-time split. In that case, cancel the + // compaction. + for c2 := range d.mu.compact.inProgress { + for i := range c2.inputs { + iter := c2.inputs[i].files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if _, ok := ve.DeletedFiles[deletedFileEntry{FileNum: f.FileNum, Level: c2.inputs[i].level}]; ok { + c2.cancel.Store(true) + break + } + } + } + } } err = d.mu.versions.logAndApply(jobID, ve, c.metrics, false, /* forceRotation */ func() []compactionInfo { return d.getInProgressCompactionInfoLocked(c) }) diff --git a/data_test.go b/data_test.go index e3a1e13dc6..3b60c0e635 100644 --- a/data_test.go +++ b/data_test.go @@ -1356,8 +1356,9 @@ func runForceIngestCmd(td *datadriven.TestData, d *DB) error { int, map[*compaction]struct{}, *fileMetadata, - ) (int, error) { - return level, nil + bool, + ) (int, *fileMetadata, error) { + return level, nil, nil }, nil /* shared */, KeyRange{}, nil /* external */) return err } diff --git a/ingest.go b/ingest.go index 11e3d759ef..94e4103d5c 100644 --- a/ingest.go +++ b/ingest.go @@ -797,6 +797,10 @@ func overlapWithIterator( return computeOverlapWithSpans(*rangeDelIter) } +// ingestTargetLevel returns the target level for a file being ingested. +// If suggestSplit is true, it accounts for ingest-time splitting as part of +// its target level calculation, and if a split candidate is found, that file +// is returned as the splitFile. func ingestTargetLevel( newIters tableNewIters, newRangeKeyIter keyspan.TableNewSpanIter, @@ -806,7 +810,8 @@ func ingestTargetLevel( baseLevel int, compactions map[*compaction]struct{}, meta *fileMetadata, -) (int, error) { + suggestSplit bool, +) (targetLevel int, splitFile *fileMetadata, err error) { // Find the lowest level which does not have any files which overlap meta. We // search from L0 to L6 looking for whether there are any files in the level // which overlap meta. We want the "lowest" level (where lower means @@ -821,6 +826,14 @@ func ingestTargetLevel( // violate the sequence number invariant. // - no file boundary overlap with level i, since that will violate the // invariant that files do not overlap in levels i > 0. + // - if there is only a file overlap at a given level, and no data overlap, + // we can still slot a file at that level. We return the fileMetadata with + // which we have file boundary overlap (must be only one file, as sstable + // bounds are usually tight on user keys) and the caller is expected to split + // that sstable into two virtual sstables, allowing this file to go into that + // level. Note that if we have file boundary overlap with two files, which + // should only happen on rare occasions, we treat it as data overlap and + // don't use this optimization. // // The file boundary overlap check is simpler to conceptualize. Consider the // following example, in which the ingested file lies completely before or @@ -865,12 +878,10 @@ func ingestTargetLevel( // existing point that falls within the ingested table bounds as being "data // overlap". - targetLevel := 0 - // This assertion implicitly checks that we have the current version of // the metadata. if v.L0Sublevels == nil { - return 0, errors.AssertionFailedf("could not read L0 sublevels") + return 0, nil, errors.AssertionFailedf("could not read L0 sublevels") } // Check for overlap over the keys of L0 by iterating over the sublevels. for subLevel := 0; subLevel < len(v.L0SublevelFiles); subLevel++ { @@ -896,10 +907,10 @@ func ingestTargetLevel( err := iter.Close() // Closes range del iter as well. err = firstError(err, levelIter.Close()) if err != nil { - return 0, err + return 0, nil, err } if overlap { - return targetLevel, nil + return targetLevel, nil, nil } } @@ -926,26 +937,47 @@ func ingestTargetLevel( err := levelIter.Close() // Closes range del iter as well. err = firstError(err, rkeyLevelIter.Close()) if err != nil { - return 0, err + return 0, nil, err } if overlap { - return targetLevel, nil + return targetLevel, splitFile, nil } // Check boundary overlap. + var candidateSplitFile *fileMetadata boundaryOverlaps := v.Overlaps(level, cmp, meta.Smallest.UserKey, meta.Largest.UserKey, meta.Largest.IsExclusiveSentinel()) if !boundaryOverlaps.Empty() { - continue + // We are already guaranteed to not have any data overlaps with files + // in boundaryOverlaps, otherwise we'd have returned in the above if + // statements. Use this, plus boundaryOverlaps.Len() == 1 to detect for + // the case where we can slot this file into the current level despite + // a boundary overlap, by splitting one existing file into two virtual + // sstables. + if suggestSplit && boundaryOverlaps.Len() == 1 { + iter := boundaryOverlaps.Iter() + candidateSplitFile = iter.First() + } else { + // We either don't want to suggest ingest-time splits (i.e. + // !suggestSplit), or we boundary-overlapped with more than one file. + continue + } } - // Check boundary overlap with any ongoing compactions. + // Check boundary overlap with any ongoing compactions. We consider an + // overlapping compaction that's writing files to an output level as + // equivalent to boundary overlap with files in that output level. + // + // We cannot check for data overlap with the new SSTs compaction will produce + // since compaction hasn't been done yet. However, there's no need to check + // since all keys in them will be from levels in [c.startLevel, + // c.outputLevel], and all those levels have already had their data overlap + // tested negative (else we'd have returned earlier). // - // We cannot check for data overlap with the new SSTs compaction will - // produce since compaction hasn't been done yet. However, there's no need - // to check since all keys in them will either be from c.startLevel or - // c.outputLevel, both levels having their data overlap already tested - // negative (else we'd have returned earlier). + // An alternative approach would be to cancel these compactions and proceed + // with an ingest-time split on this level if necessary. However, compaction + // cancellation can result in significant wasted effort and is best avoided + // unless necessary. overlaps := false for c := range compactions { if c.outputLevel == nil || level != c.outputLevel.level { @@ -959,9 +991,10 @@ func ingestTargetLevel( } if !overlaps { targetLevel = level + splitFile = candidateSplitFile } } - return targetLevel, nil + return targetLevel, splitFile, nil } // Ingest ingests a set of sstables into the DB. Ingestion of the files is @@ -1821,7 +1854,124 @@ type ingestTargetLevelFunc func( baseLevel int, compactions map[*compaction]struct{}, meta *fileMetadata, -) (int, error) + suggestSplit bool, +) (int, *fileMetadata, error) + +type ingestSplitFile struct { + // ingestFile is the file being ingested. + ingestFile *fileMetadata + // splitFile is the file that needs to be split to allow ingestFile to slot + // into `level` level. + splitFile *fileMetadata + // The level where ingestFile will go (and where splitFile already is). + level int +} + +// ingestSplit splits files specified in `files` and updates ve in-place to +// account for existing files getting split into two virtual sstables. The map +// `replacedFiles` contains an in-progress map of all files that have been +// replaced with new virtual sstables in this version edit so far, which is also +// updated in-place. +// +// d.mu as well as the manifest lock must be held when calling this method. +func (d *DB) ingestSplit( + ve *versionEdit, + updateMetrics func(*fileMetadata, int, []newFileEntry), + files []ingestSplitFile, + replacedFiles map[base.FileNum][]newFileEntry, +) error { + for _, s := range files { + // replacedFiles can be thought of as a tree, where we start iterating with + // s.splitFile and run its fileNum through replacedFiles, then find which of + // the replaced files overlaps with s.ingestFile, which becomes the new + // splitFile, then we check splitFile's replacements in replacedFiles again + // for overlap with s.ingestFile, and so on until we either can't find the + // current splitFile in replacedFiles (i.e. that's the file that now needs to + // be split), or we don't find a file that overlaps with s.ingestFile, which + // means a prior ingest split already produced enough room for s.ingestFile + // to go into this level without necessitating another ingest split. + splitFile := s.splitFile + for splitFile != nil { + replaced, ok := replacedFiles[splitFile.FileNum] + if !ok { + break + } + updatedSplitFile := false + for i := range replaced { + if replaced[i].Meta.Overlaps(d.cmp, s.ingestFile.Smallest.UserKey, s.ingestFile.Largest.UserKey, s.ingestFile.Largest.IsExclusiveSentinel()) { + if updatedSplitFile { + // This should never happen because the earlier ingestTargetLevel + // function only finds split file candidates that are guaranteed to + // have no data overlap, only boundary overlap. See the comments + // in that method to see the definitions of data vs boundary + // overlap. That, plus the fact that files in `replaced` are + // guaranteed to have file bounds that are tight on user keys + // (as that's what `d.excise` produces), means that the only case + // where we overlap with two or more files in `replaced` is if we + // actually had data overlap all along, or if the ingestion files + // were overlapping, either of which is an invariant violation. + panic("updated with two files in ingestSplit") + } + splitFile = replaced[i].Meta + updatedSplitFile = true + } + } + if !updatedSplitFile { + // None of the replaced files overlapped with the file being ingested. + // This can happen if we've already excised a span overlapping with + // this file, or if we have consecutive ingested files that can slide + // within the same gap between keys in an existing file. For instance, + // if an existing file has keys a and g and we're ingesting b-c, d-e, + // the first loop iteration will split the existing file into one that + // ends in a and another that starts at g, and the second iteration will + // fall into this case and require no splitting. + // + // No splitting necessary. + splitFile = nil + } + } + if splitFile == nil { + continue + } + // NB: excise operates on [start, end). We're splitting at [start, end] + // (assuming !s.ingestFile.Largest.IsExclusiveSentinel()). The conflation + // of exclusive vs inclusive end bounds should not make a difference here + // as we're guaranteed to not have any data overlap between splitFile and + // s.ingestFile, so panic if we do see a newly added file with an endKey + // equalling s.ingestFile.Largest, and !s.ingestFile.Largest.IsExclusiveSentinel() + added, err := d.excise(KeyRange{Start: s.ingestFile.Smallest.UserKey, End: s.ingestFile.Largest.UserKey}, splitFile, ve, s.level) + if err != nil { + return err + } + if _, ok := ve.DeletedFiles[deletedFileEntry{ + Level: s.level, + FileNum: splitFile.FileNum, + }]; !ok { + panic("did not split file that was expected to be split") + } + replacedFiles[splitFile.FileNum] = added + for i := range added { + if s.ingestFile.Overlaps(d.cmp, added[i].Meta.Smallest.UserKey, added[i].Meta.Largest.UserKey, added[i].Meta.Largest.IsExclusiveSentinel()) { + panic("ingest-time split produced a file that overlaps with ingested file") + } + } + updateMetrics(splitFile, s.level, added) + } + // Flatten the version edit by removing any entries from ve.NewFiles that + // are also in ve.DeletedFiles. + newNewFiles := ve.NewFiles[:0] + for i := range ve.NewFiles { + fn := ve.NewFiles[i].Meta.FileNum + deEntry := deletedFileEntry{Level: ve.NewFiles[i].Level, FileNum: fn} + if _, ok := ve.DeletedFiles[deEntry]; ok { + delete(ve.DeletedFiles, deEntry) + } else { + newNewFiles = append(newNewFiles, ve.NewFiles[i]) + } + } + ve.NewFiles = newNewFiles + return nil +} func (d *DB) ingestApply( jobID int, @@ -1836,7 +1986,7 @@ func (d *DB) ingestApply( ve := &versionEdit{ NewFiles: make([]newFileEntry, lr.fileCount), } - if exciseSpan.Valid() { + if exciseSpan.Valid() || (d.opts.Experimental.IngestSplit != nil && d.opts.Experimental.IngestSplit()) { ve.DeletedFiles = map[manifest.DeletedFileEntry]*manifest.FileMetadata{} } metrics := make(map[int]*LevelMetrics) @@ -1861,9 +2011,17 @@ func (d *DB) ingestApply( } } + shouldIngestSplit := d.opts.Experimental.IngestSplit != nil && + d.opts.Experimental.IngestSplit() && d.FormatMajorVersion() >= FormatVirtualSSTables current := d.mu.versions.currentVersion() baseLevel := d.mu.versions.picker.getBaseLevel() iterOps := IterOptions{logger: d.opts.Logger} + // filesToSplit is a list where each element is a pair consisting of a file + // being ingested and a file being split to make room for an ingestion into + // that level. Each ingested file will appear at most once in this list. It + // is possible for split files to appear twice in this list. + filesToSplit := make([]ingestSplitFile, 0) + checkCompactions := false for i := 0; i < lr.fileCount; i++ { // Determine the lowest level in the LSM for which the sstable doesn't // overlap any existing files in the level. @@ -1896,6 +2054,7 @@ func (d *DB) ingestApply( if externalFile { ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) } + var splitFile *fileMetadata if exciseSpan.Valid() && exciseSpan.Contains(d.cmp, m.Smallest) && exciseSpan.Contains(d.cmp, m.Largest) { // This file fits perfectly within the excise span. We can slot it at // L6, or sharedLevelsStart - 1 if we have shared files. @@ -1908,7 +2067,31 @@ func (d *DB) ingestApply( f.Level = 6 } } else { - f.Level, err = findTargetLevel(d.newIters, d.tableNewRangeKeyIter, iterOps, d.cmp, current, baseLevel, d.mu.compact.inProgress, m) + // TODO(bilal): findTargetLevel does disk IO (reading files for data + // overlap) even though we're holding onto d.mu. Consider unlocking + // d.mu while we do this. We already hold versions.logLock so we should + // not see any version applications while we're at this. The one + // complication here would be pulling out the mu.compact.inProgress + // check from findTargetLevel, as that requires d.mu to be held. + f.Level, splitFile, err = findTargetLevel( + d.newIters, d.tableNewRangeKeyIter, iterOps, d.cmp, current, baseLevel, d.mu.compact.inProgress, m, shouldIngestSplit) + } + + if splitFile != nil { + if invariants.Enabled { + if lf := current.Levels[f.Level].Find(d.cmp, splitFile); lf == nil { + panic("splitFile returned is not in level it should be") + } + } + // We take advantage of the fact that we won't drop the db mutex + // between now and the call to logAndApply. So, no files should + // get added to a new in-progress compaction at this point. We can + // avoid having to iterate on in-progress compactions to cancel them + // if none of the files being split have a compacting state. + if splitFile.IsCompacting() { + checkCompactions = true + } + filesToSplit = append(filesToSplit, ingestSplitFile{ingestFile: m, splitFile: splitFile, level: f.Level}) } } if err != nil { @@ -1926,6 +2109,26 @@ func (d *DB) ingestApply( levelMetrics.BytesIngested += m.Size levelMetrics.TablesIngested++ } + // replacedFiles maps files excised due to exciseSpan (or splitFiles returned + // by ingestTargetLevel), to files that were created to replace it. This map + // is used to resolve references to split files in filesToSplit, as it is + // possible for a file that we want to split to no longer exist or have a + // newer fileMetadata due to a split induced by another ingestion file, or an + // excise. + replacedFiles := make(map[base.FileNum][]newFileEntry) + updateLevelMetricsOnExcise := func(m *fileMetadata, level int, added []newFileEntry) { + levelMetrics := metrics[level] + if levelMetrics == nil { + levelMetrics = &LevelMetrics{} + metrics[level] = levelMetrics + } + levelMetrics.NumFiles-- + levelMetrics.Size -= int64(m.Size) + for i := range added { + levelMetrics.NumFiles++ + levelMetrics.Size += int64(added[i].Meta.Size) + } + } if exciseSpan.Valid() { // Iterate through all levels and find files that intersect with exciseSpan. // @@ -1947,7 +2150,7 @@ func (d *DB) ingestApply( iter := overlaps.Iter() for m := iter.First(); m != nil; m = iter.Next() { - excised, err := d.excise(exciseSpan, m, ve, level) + newFiles, err := d.excise(exciseSpan, m, ve, level) if err != nil { return nil, err } @@ -1959,19 +2162,19 @@ func (d *DB) ingestApply( // We did not excise this file. continue } - levelMetrics := metrics[level] - if levelMetrics == nil { - levelMetrics = &LevelMetrics{} - metrics[level] = levelMetrics - } - levelMetrics.NumFiles-- - levelMetrics.Size -= int64(m.Size) - for i := range excised { - levelMetrics.NumFiles++ - levelMetrics.Size += int64(excised[i].Meta.Size) - } + replacedFiles[m.FileNum] = newFiles + updateLevelMetricsOnExcise(m, level, newFiles) } } + } + if len(filesToSplit) > 0 { + // For the same reasons as the above call to excise, we hold the db mutex + // while calling this method. + if err := d.ingestSplit(ve, updateLevelMetricsOnExcise, filesToSplit, replacedFiles); err != nil { + return nil, err + } + } + if len(filesToSplit) > 0 || exciseSpan.Valid() { for c := range d.mu.compact.inProgress { if c.versionEditApplied { continue @@ -1986,22 +2189,41 @@ func (d *DB) ingestApply( if exciseSpan.OverlapsInternalKeyRange(d.cmp, c.smallest, c.largest) { c.cancel.Store(true) } + // Check if this compaction's inputs have been replaced due to an + // ingest-time split. In that case, cancel the compaction as a newly picked + // compaction would need to include any new files that slid in between + // previously-existing files. Note that we cancel any compaction that has a + // file that was ingest-split as an input, even if it started before this + // ingestion. + if checkCompactions { + for i := range c.inputs { + iter := c.inputs[i].files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if _, ok := replacedFiles[f.FileNum]; ok { + c.cancel.Store(true) + break + } + } + } + } } // Check for any EventuallyFileOnlySnapshots that could be watching for // an excise on this span. - for s := d.mu.snapshots.root.next; s != &d.mu.snapshots.root; s = s.next { - if s.efos == nil { - continue - } - efos := s.efos - // TODO(bilal): We can make this faster by taking advantage of the sorted - // nature of protectedRanges to do a sort.Search, or even maintaining a - // global list of all protected ranges instead of having to peer into every - // snapshot. - for i := range efos.protectedRanges { - if efos.protectedRanges[i].OverlapsKeyRange(d.cmp, exciseSpan) { - efos.excised.Store(true) - break + if exciseSpan.Valid() { + for s := d.mu.snapshots.root.next; s != &d.mu.snapshots.root; s = s.next { + if s.efos == nil { + continue + } + efos := s.efos + // TODO(bilal): We can make this faster by taking advantage of the sorted + // nature of protectedRanges to do a sort.Search, or even maintaining a + // global list of all protected ranges instead of having to peer into every + // snapshot. + for i := range efos.protectedRanges { + if efos.protectedRanges[i].OverlapsKeyRange(d.cmp, exciseSpan) { + efos.excised.Store(true) + break + } } } } diff --git a/ingest_test.go b/ingest_test.go index a002fff707..6c539ed45d 100644 --- a/ingest_test.go +++ b/ingest_test.go @@ -1957,16 +1957,28 @@ func TestIngestTargetLevel(t *testing.T) { case "target": var buf bytes.Buffer + suggestSplit := false + for _, cmd := range td.CmdArgs { + switch cmd.Key { + case "suggest-split": + suggestSplit = true + } + } for _, target := range strings.Split(td.Input, "\n") { meta := parseMeta(target) - level, err := ingestTargetLevel( + level, overlapFile, err := ingestTargetLevel( d.newIters, d.tableNewRangeKeyIter, IterOptions{logger: d.opts.Logger}, d.cmp, d.mu.versions.currentVersion(), 1, d.mu.compact.inProgress, meta, + suggestSplit, ) if err != nil { return err.Error() } - fmt.Fprintf(&buf, "%d\n", level) + if overlapFile != nil { + fmt.Fprintf(&buf, "%d (split file: %s)\n", level, overlapFile.FileNum) + } else { + fmt.Fprintf(&buf, "%d\n", level) + } } return buf.String() @@ -1984,7 +1996,7 @@ func TestIngest(t *testing.T) { require.NoError(t, d.Close()) }() - reset := func() { + reset := func(split bool) { if d != nil { require.NoError(t, d.Close()) } @@ -2001,6 +2013,9 @@ func TestIngest(t *testing.T) { }}, FormatMajorVersion: internalFormatNewest, } + opts.Experimental.IngestSplit = func() bool { + return split + } // Disable automatic compactions because otherwise we'll race with // delete-only compactions triggered by ingesting range tombstones. opts.DisableAutomaticCompactions = true @@ -2009,12 +2024,21 @@ func TestIngest(t *testing.T) { d, err = Open("", opts) require.NoError(t, err) } - reset() + reset(false /* split */) datadriven.RunTest(t, "testdata/ingest", func(t *testing.T, td *datadriven.TestData) string { switch td.Cmd { case "reset": - reset() + split := false + for _, cmd := range td.CmdArgs { + switch cmd.Key { + case "enable-split": + split = true + default: + return fmt.Sprintf("unexpected key: %s", cmd.Key) + } + } + reset(split) return "" case "batch": b := d.NewIndexedBatch() diff --git a/options.go b/options.go index 75281ce00b..eee41c9741 100644 --- a/options.go +++ b/options.go @@ -550,6 +550,11 @@ type Options struct { // concurrency slots as determined by the two options is chosen. CompactionDebtConcurrency uint64 + // IngestSplit, if it returns true, allows for ingest-time splitting of + // existing sstables into two virtual sstables to allow ingestion sstables to + // slot into a lower level than they otherwise would have. + IngestSplit func() bool + // ReadCompactionRate controls the frequency of read triggered // compactions by adjusting `AllowedSeeks` in manifest.FileMetadata: // diff --git a/testdata/ingest b/testdata/ingest index 37ffc9c8b5..5d292a9d50 100644 --- a/testdata/ingest +++ b/testdata/ingest @@ -907,3 +907,272 @@ lsm 000006:[f#12,SET-h#12,SET] 000010:[s#16,RANGEKEYDEL-x#inf,RANGEKEYDEL] 000009:[x#15,SET-y#15,SET] + +reset enable-split +---- + +build ext10 +set a foo +set e bar +---- + +ingest ext10 +---- + +lsm +---- +6: + 000004:[a#10,SET-e#10,SET] + +# The below ingestion should split one existing file. + +build ext11 +set b foobar +set d foobar +---- + +ingest ext11 +---- + +lsm +---- +6: + 000006:[a#10,SET-a#10,SET] + 000005:[b#11,SET-d#11,SET] + 000007:[e#10,SET-e#10,SET] + +iter +first +next +next +next +---- +a: (foo, .) +b: (foobar, .) +d: (foobar, .) +e: (bar, .) + +# This ingestion should not split any files due to data overlap. + +build ext12 +set c foobar +set e baz +---- + +ingest ext12 +---- + +lsm +---- +0.0: + 000008:[c#12,SET-e#12,SET] +6: + 000006:[a#10,SET-a#10,SET] + 000005:[b#11,SET-d#11,SET] + 000007:[e#10,SET-e#10,SET] + +# The below ingestion should fall through one existing file and split another +# file. + +build ext13 +set cc foo +set ccc foooo +---- + +ingest ext13 +---- + +lsm +---- +0.0: + 000008:[c#12,SET-e#12,SET] +6: + 000006:[a#10,SET-a#10,SET] + 000010:[b#11,SET-b#11,SET] + 000009:[cc#13,SET-ccc#13,SET] + 000011:[d#11,SET-d#11,SET] + 000007:[e#10,SET-e#10,SET] + +iter +seek-ge c +next +next +next +next +---- +c: (foobar, .) +cc: (foo, .) +ccc: (foooo, .) +d: (foobar, .) +e: (baz, .) + +# Ingestion splitting doesn't kick in at L0. + +build ext14 +set d updated +set dd new +---- + +ingest ext14 +---- + +lsm +---- +0.1: + 000012:[d#14,SET-dd#14,SET] +0.0: + 000008:[c#12,SET-e#12,SET] +6: + 000006:[a#10,SET-a#10,SET] + 000010:[b#11,SET-b#11,SET] + 000009:[cc#13,SET-ccc#13,SET] + 000011:[d#11,SET-d#11,SET] + 000007:[e#10,SET-e#10,SET] + +iter +seek-lt d +next +next +next +next +---- +ccc: (foooo, .) +d: (updated, .) +dd: (new, .) +e: (baz, .) +. + +# Multi-sstable ingestion batches. This exercises logic to find the appropriate +# file to split for each newly ingested file, as we will be repeatedly splitting +# files into smaller virtual files. + +reset enable-split +---- + +build ext10 +set a foo +set e bar +set g baz +---- + +ingest ext10 +---- + +lsm +---- +6: + 000004:[a#10,SET-g#10,SET] + +build ext11 +set b foobar +set c foobar +---- + +build ext12 +set cc foobar +set d foobar +---- + +# This ingestion should slide in the same gap between keys in ext10. + +ingest ext11 ext12 +---- + +lsm +---- +6: + 000007:[a#10,SET-a#10,SET] + 000005:[b#11,SET-c#11,SET] + 000006:[cc#12,SET-d#12,SET] + 000008:[e#10,SET-g#10,SET] + +# A virtual sstable produced from an ingest split can be ingest split again. + +build ext13 +set ee foooo +set f bar +---- + +ingest ext13 +---- + +lsm +---- +6: + 000007:[a#10,SET-a#10,SET] + 000005:[b#11,SET-c#11,SET] + 000006:[cc#12,SET-d#12,SET] + 000010:[e#10,SET-e#10,SET] + 000009:[ee#13,SET-f#13,SET] + 000011:[g#10,SET-g#10,SET] + +reset enable-split +---- + +build ext10 +set a foo +set e bar +set g baz +---- + +ingest ext10 +---- + +lsm +---- +6: + 000004:[a#10,SET-g#10,SET] + +build ext11 +set b foobar +set c foobar +---- + +build ext12 +set cc foobar +set d foobar +---- + +build ext13 +set ee foooo +set f bar +---- + +# This ingestion should split ext10 twice, and land two files on one side +# of a key in it, and another file on another side of it. + +ingest ext11 ext12 ext13 +---- + +lsm +---- +6: + 000008:[a#10,SET-a#10,SET] + 000005:[b#11,SET-c#11,SET] + 000006:[cc#12,SET-d#12,SET] + 000010:[e#10,SET-e#10,SET] + 000007:[ee#13,SET-f#13,SET] + 000011:[g#10,SET-g#10,SET] + +iter +first +next +next +next +next +next +next +next +next +next +---- +a: (foo, .) +b: (foobar, .) +c: (foobar, .) +cc: (foobar, .) +d: (foobar, .) +e: (bar, .) +ee: (foooo, .) +f: (bar, .) +g: (baz, .) +. diff --git a/testdata/ingest_target_level b/testdata/ingest_target_level index 66d8d65924..e6b8edf1a3 100644 --- a/testdata/ingest_target_level +++ b/testdata/ingest_target_level @@ -246,3 +246,79 @@ target rkey:a-c ---- 4 + +# Cases with boundary overlap and no data overlap. With suggest-split off +# we get a target level of L0, but with suggest-split on, we get suggested +# a file split. + +define +L6 + a.SET.2:2 + d.SET.3:3 +L6 + f.SET.4:4 + k.SET.6:6 +---- +6: + 000004:[a#2,SET-d#3,SET] + 000005:[f#4,SET-k#6,SET] + +target +b-c +e-g +---- +5 +5 + +target suggest-split +b-c +e-g +---- +6 (split file: 000004) +5 + +target suggest-split +g-i +---- +6 (split file: 000005) + +# suggest-split recognizes and avoids in-progress compactions. + +define +L6 + a.SET.2:2 + d.SET.3:3 +L6 + f.SET.4:4 + k.SET.6:6 + compact:f-k +---- +6: + 000004:[a#2,SET-d#3,SET] + 000005:[f#4,SET-k#6,SET] + +target suggest-split +g-i +---- +5 + +# Ingestion splitting correctly recognizes data overlap in L6, and suggests +# split in L5. + +define +L5 + a.SET.2:2 + e.SET.3:3 +L6 + c.SET.1:1 + k.SET.1:1 +---- +5: + 000004:[a#2,SET-e#3,SET] +6: + 000005:[c#1,SET-k#1,SET] + +target suggest-split +b-c +---- +5 (split file: 000004)