From d5eaf3c1d33504dffe507937346a4cc2da716c56 Mon Sep 17 00:00:00 2001
From: Bilal Akhtar <bilal@cockroachlabs.com>
Date: Wed, 31 May 2023 17:04:25 -0400
Subject: [PATCH] db: add ingest-time splitting of ssts into virtual ones

Currently, if we identify boundary overlap in a level
during ingest target level calculation, but no data overlap,
we are forced to find a target level above the file we saw
the overlap with (if we can't fall below it, such as if the
existing file is in L6, which happens commonly).

This change takes advantage of virtual sstables to split
existing sstables into two virtual sstables when an ingested
sstable would be able to go into the same level had the sstables
been split that way to begin with. Doing this split reduces a
lot of write-amp as it avoids us from having to compact the
newly-ingested sstable with the sstable it boundary-overlapped with.

Biggest part of #1683. First commit is #2538, which this shares
a lot of logic with (mostly just the excise function).
---
 compaction.go                |  54 +++++++-
 data_test.go                 |   5 +-
 ingest.go                    | 231 +++++++++++++++++++++++++++++++----
 ingest_test.go               |  34 +++++-
 options.go                   |   5 +
 testdata/ingest              | 134 ++++++++++++++++++++
 testdata/ingest_target_level |  76 ++++++++++++
 7 files changed, 506 insertions(+), 33 deletions(-)

diff --git a/compaction.go b/compaction.go
index d9121aae2a..ec792be7cc 100644
--- a/compaction.go
+++ b/compaction.go
@@ -1880,15 +1880,27 @@ func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) {
 	ve := &versionEdit{}
 	var level int
 	var err error
+	var fileToSplit *fileMetadata
+	var ingestSplitFiles []ingestSplitFile
 	for _, file := range c.flushing[0].flushable.(*ingestedFlushable).files {
-		level, err = ingestTargetLevel(
+		suggestSplit := d.opts.Experimental.IngestSplit != nil && d.opts.Experimental.IngestSplit() &&
+			d.FormatMajorVersion() >= ExperimentalFormatVirtualSSTables
+		level, fileToSplit, err = ingestTargetLevel(
 			d.newIters, d.tableNewRangeKeyIter, iterOpts, d.cmp,
 			c.version, baseLevel, d.mu.compact.inProgress, file.FileMetadata,
+			suggestSplit,
 		)
 		if err != nil {
 			return nil, err
 		}
 		ve.NewFiles = append(ve.NewFiles, newFileEntry{Level: level, Meta: file.FileMetadata})
+		if fileToSplit != nil {
+			ingestSplitFiles = append(ingestSplitFiles, ingestSplitFile{
+				ingestFile: file.FileMetadata,
+				splitFile:  fileToSplit,
+				level:      level,
+			})
+		}
 		levelMetrics := c.metrics[level]
 		if levelMetrics == nil {
 			levelMetrics = &LevelMetrics{}
@@ -1900,6 +1912,28 @@ func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) {
 		levelMetrics.TablesIngested++
 	}
 
+	updateLevelMetricsOnExcise := func(m *fileMetadata, level int, added []newFileEntry) {
+		levelMetrics := c.metrics[level]
+		if levelMetrics == nil {
+			levelMetrics = &LevelMetrics{}
+			c.metrics[level] = levelMetrics
+		}
+		levelMetrics.NumFiles--
+		levelMetrics.Size -= int64(m.Size)
+		for i := range added {
+			levelMetrics.NumFiles++
+			levelMetrics.Size += int64(added[i].Meta.Size)
+		}
+	}
+
+	if len(ingestSplitFiles) > 0 {
+		ve.DeletedFiles = make(map[manifest.DeletedFileEntry]*manifest.FileMetadata)
+		replacedFiles := make(map[base.FileNum][]newFileEntry)
+		if err := d.ingestSplit(ve, updateLevelMetricsOnExcise, ingestSplitFiles, KeyRange{}, replacedFiles); err != nil {
+			return nil, err
+		}
+	}
+
 	return ve, nil
 }
 
@@ -2069,6 +2103,24 @@ func (d *DB) flush1() (bytesFlushed uint64, err error) {
 					metrics.BytesIn += d.mu.mem.queue[i].logSize
 				}
 			}
+		} else if len(ve.DeletedFiles) > 0 {
+			// c.kind == compactionKindIngestedFlushable && we have deleted files due
+			// to ingest-time splits.
+			//
+			// Iterate through all other compactions, and check if their inputs have
+			// been replaced due to an ingest-time split. In that case, cancel the
+			// compaction.
+			for c2 := range d.mu.compact.inProgress {
+				for i := range c2.inputs {
+					iter := c2.inputs[i].files.Iter()
+					for f := iter.First(); f != nil; f = iter.Next() {
+						if _, ok := ve.DeletedFiles[deletedFileEntry{FileNum: f.FileNum, Level: c2.inputs[i].level}]; ok {
+							c2.cancel.Store(true)
+							break
+						}
+					}
+				}
+			}
 		}
 		err = d.mu.versions.logAndApply(jobID, ve, c.metrics, false, /* forceRotation */
 			func() []compactionInfo { return d.getInProgressCompactionInfoLocked(c) })
diff --git a/data_test.go b/data_test.go
index eb8684170d..8936f238c5 100644
--- a/data_test.go
+++ b/data_test.go
@@ -1354,8 +1354,9 @@ func runForceIngestCmd(td *datadriven.TestData, d *DB) error {
 		int,
 		map[*compaction]struct{},
 		*fileMetadata,
-	) (int, error) {
-		return level, nil
+		bool,
+	) (int, *fileMetadata, error) {
+		return level, nil, nil
 	}, nil /* shared */, KeyRange{}, nil /* external */)
 	return err
 }
diff --git a/ingest.go b/ingest.go
index 3577e2ece7..934ebd3041 100644
--- a/ingest.go
+++ b/ingest.go
@@ -773,6 +773,10 @@ func overlapWithIterator(
 	return computeOverlapWithSpans(*rangeDelIter)
 }
 
+// ingestTargetLevel returns the target level for a file being ingested.
+// If suggestSplit is true, it accounts for ingest-time splitting as part of
+// its target level calculation, and if a split candidate is found, that file
+// is returned as the splitFile.
 func ingestTargetLevel(
 	newIters tableNewIters,
 	newRangeKeyIter keyspan.TableNewSpanIter,
@@ -782,7 +786,8 @@ func ingestTargetLevel(
 	baseLevel int,
 	compactions map[*compaction]struct{},
 	meta *fileMetadata,
-) (int, error) {
+	suggestSplit bool,
+) (targetLevel int, splitFile *fileMetadata, err error) {
 	// Find the lowest level which does not have any files which overlap meta. We
 	// search from L0 to L6 looking for whether there are any files in the level
 	// which overlap meta. We want the "lowest" level (where lower means
@@ -797,6 +802,12 @@ func ingestTargetLevel(
 	//   violate the sequence number invariant.
 	// - no file boundary overlap with level i, since that will violate the
 	//   invariant that files do not overlap in levels i > 0.
+	//   - if there is only a file overlap at a given level, and no data overlap,
+	//     we can still slot a file at that level. We return the fileMetadata with
+	//     which we have file boundary overlap (must be only one file by definition, as
+	//     sstable bounds are tight on user keys) and the caller is expected to split
+	//     that sstable into two virtual sstables, allowing this file to go into that
+	//     level.
 	//
 	// The file boundary overlap check is simpler to conceptualize. Consider the
 	// following example, in which the ingested file lies completely before or
@@ -841,12 +852,10 @@ func ingestTargetLevel(
 	// existing point that falls within the ingested table bounds as being "data
 	// overlap".
 
-	targetLevel := 0
-
 	// This assertion implicitly checks that we have the current version of
 	// the metadata.
 	if v.L0Sublevels == nil {
-		return 0, errors.AssertionFailedf("could not read L0 sublevels")
+		return 0, nil, errors.AssertionFailedf("could not read L0 sublevels")
 	}
 	// Check for overlap over the keys of L0 by iterating over the sublevels.
 	for subLevel := 0; subLevel < len(v.L0SublevelFiles); subLevel++ {
@@ -872,10 +881,10 @@ func ingestTargetLevel(
 		err := iter.Close() // Closes range del iter as well.
 		err = firstError(err, levelIter.Close())
 		if err != nil {
-			return 0, err
+			return 0, nil, err
 		}
 		if overlap {
-			return targetLevel, nil
+			return targetLevel, nil, nil
 		}
 	}
 
@@ -902,17 +911,32 @@ func ingestTargetLevel(
 		err := levelIter.Close() // Closes range del iter as well.
 		err = firstError(err, rkeyLevelIter.Close())
 		if err != nil {
-			return 0, err
+			return 0, nil, err
 		}
 		if overlap {
-			return targetLevel, nil
+			return targetLevel, splitFile, nil
 		}
 
 		// Check boundary overlap.
+		splitFile = nil
 		boundaryOverlaps := v.Overlaps(level, cmp, meta.Smallest.UserKey,
 			meta.Largest.UserKey, meta.Largest.IsExclusiveSentinel())
 		if !boundaryOverlaps.Empty() {
-			continue
+			// We are already guaranteed to not have any data overlaps with files
+			// in boundaryOverlaps, otherwise we'd have gone into the above if
+			// statements. Use this, plus boundaryOverlaps.Len() == 1 to detect for
+			// the case where we can slot this file into the current level despite
+			// a boundary overlap, by splitting one existing file into two virtual
+			// sstables.
+			if suggestSplit && boundaryOverlaps.Len() == 1 {
+				iter := boundaryOverlaps.Iter()
+				splitFile = iter.First()
+			} else {
+				// We either don't want to suggest ingest-time splits (i.e.
+				// !suggestSplit), or we boundary-overlapped with more than one file.
+				splitFile = nil
+				continue
+			}
 		}
 
 		// Check boundary overlap with any ongoing compactions.
@@ -935,9 +959,13 @@ func ingestTargetLevel(
 		}
 		if !overlaps {
 			targetLevel = level
+		} else {
+			// We found an overlap with a compaction. Splitting a file won't resolve
+			// this, so reset splitFile.
+			splitFile = nil
 		}
 	}
-	return targetLevel, nil
+	return targetLevel, splitFile, nil
 }
 
 // Ingest ingests a set of sstables into the DB. Ingestion of the files is
@@ -1796,7 +1824,95 @@ type ingestTargetLevelFunc func(
 	baseLevel int,
 	compactions map[*compaction]struct{},
 	meta *fileMetadata,
-) (int, error)
+	suggestSplit bool,
+) (int, *fileMetadata, error)
+
+type ingestSplitFile struct {
+	// ingestFile is the file being ingested.
+	ingestFile *fileMetadata
+	// splitFile is the file that needs to be split to allow ingestFile to slot
+	// into `level` level.
+	splitFile *fileMetadata
+	// The level where ingestFile will go (and where splitFile already is).
+	level int
+}
+
+// ingestSplit splits files specified in `files` and updates ve in-place to
+// account for existing files getting split into two virtual sstables. The map
+// `replacedFiles` contains an in-progress map of all files that have been
+// replaced with new virtual sstables in this version edit so far, which is also
+// updated in-place.
+//
+// d.mu as well as the manifest lock must be held when calling this method.
+func (d *DB) ingestSplit(
+	ve *versionEdit,
+	updateMetrics func(*fileMetadata, int, []newFileEntry),
+	files []ingestSplitFile,
+	exciseSpan KeyRange,
+	replacedFiles map[base.FileNum][]newFileEntry,
+) error {
+	for _, s := range files {
+		// replacedFiles can be thought of as a tree, where we start iterating with
+		// s.splitFile and run its fileNum through replacedFiles, then find which
+		// of the replaced files overlaps with s.ingestFile, check its
+		// replacements in replacedFiles again for overlap with s.ingestFile, and
+		// so on until we can't find the current splitFile in replacedFiles.
+		splitFile := s.splitFile
+		for splitFile != nil {
+			replaced, ok := replacedFiles[splitFile.FileNum]
+			if !ok {
+				break
+			}
+			updatedSplitFile := false
+			for i := range replaced {
+				if replaced[i].Meta.Overlaps(d.cmp, s.ingestFile.Smallest.UserKey, s.ingestFile.Largest.UserKey, s.ingestFile.Largest.IsExclusiveSentinel()) {
+					if updatedSplitFile {
+						panic("updated with two files in ingestSplit")
+					}
+					splitFile = replaced[i].Meta
+					updatedSplitFile = true
+				}
+			}
+			if !updatedSplitFile {
+				// None of the replaced files overlapped with the file being ingested.
+				// This is only okay if we have an excise span that overlaps with
+				// the ingested file.
+				if !exciseSpan.Valid() || !s.ingestFile.Overlaps(d.cmp, exciseSpan.Start, exciseSpan.End, true /* exclusiveEnd */) {
+					panic("could not find a file to split that overlaps with ingested file")
+				}
+				// No file to split.
+				splitFile = nil
+			}
+		}
+		if splitFile == nil {
+			continue
+		}
+		// NB: excise operates on [start, end). We're splitting at [start, end]
+		// (assuming !s.ingestFile.Largest.IsExclusiveSentinel()). The conflation
+		// of exclusive vs inclusive end bounds should not make a difference here
+		// as we're guaranteed to not have any data overlap between splitFile and
+		// s.ingestFile, so panic if we do see a newly added file with an endKey
+		// equalling s.ingestFile.Largest, and !s.ingestFile.Largest.IsExclusiveSentinel()
+		added, err := d.excise(KeyRange{Start: s.ingestFile.Smallest.UserKey, End: s.ingestFile.Largest.UserKey}, splitFile, ve, s.level)
+		if err != nil {
+			return err
+		}
+		if _, ok := ve.DeletedFiles[deletedFileEntry{
+			Level:   s.level,
+			FileNum: splitFile.FileNum,
+		}]; !ok {
+			panic("did not split file that was expected to be split")
+		}
+		replacedFiles[splitFile.FileNum] = added
+		for i := range added {
+			if s.ingestFile.Overlaps(d.cmp, added[i].Meta.Smallest.UserKey, added[i].Meta.Largest.UserKey, added[i].Meta.Largest.IsExclusiveSentinel()) {
+				panic("ingest-time split produced a file that overlaps with ingested file")
+			}
+		}
+		updateMetrics(splitFile, s.level, added)
+	}
+	return nil
+}
 
 func (d *DB) ingestApply(
 	jobID int,
@@ -1811,7 +1927,7 @@ func (d *DB) ingestApply(
 	ve := &versionEdit{
 		NewFiles: make([]newFileEntry, lr.fileCount),
 	}
-	if exciseSpan.Valid() {
+	if exciseSpan.Valid() || (d.opts.Experimental.IngestSplit != nil && d.opts.Experimental.IngestSplit()) {
 		ve.DeletedFiles = map[manifest.DeletedFileEntry]*manifest.FileMetadata{}
 	}
 	metrics := make(map[int]*LevelMetrics)
@@ -1839,6 +1955,12 @@ func (d *DB) ingestApply(
 	current := d.mu.versions.currentVersion()
 	baseLevel := d.mu.versions.picker.getBaseLevel()
 	iterOps := IterOptions{logger: d.opts.Logger}
+	// filesToSplit is a list where each element is a pair consisting of a file
+	// being ingested and a file being split to make room for an ingestion into
+	// that level. Each ingested file will appear at most once in this list. It
+	// is possible for split files to appear twice in this list.
+	filesToSplit := make([]ingestSplitFile, 0)
+	checkCompactions := false
 	for i := 0; i < lr.fileCount; i++ {
 		// Determine the lowest level in the LSM for which the sstable doesn't
 		// overlap any existing files in the level.
@@ -1871,6 +1993,7 @@ func (d *DB) ingestApply(
 			if externalFile {
 				ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking)
 			}
+			var splitFile *fileMetadata
 			if exciseSpan.Valid() && exciseSpan.Contains(d.cmp, m.Smallest) && exciseSpan.Contains(d.cmp, m.Largest) {
 				// This file fits perfectly within the excise span. We can slot it at
 				// L6, or sharedLevelsStart - 1 if we have shared files.
@@ -1883,7 +2006,33 @@ func (d *DB) ingestApply(
 					f.Level = 6
 				}
 			} else {
-				f.Level, err = findTargetLevel(d.newIters, d.tableNewRangeKeyIter, iterOps, d.cmp, current, baseLevel, d.mu.compact.inProgress, m)
+				// TODO(bilal): findTargetLevel does disk IO (reading files for data
+				// overlap) even though we're holding onto d.mu. Consider unlocking
+				// d.mu while we do this. We already hold versions.logLock so we should
+				// not see any version applications while we're at this. The one
+				// complication here would be pulling out the mu.compact.inProgress
+				// check from findTargetLevel, as that requires d.mu to be held.
+				shouldIngestSplit := d.opts.Experimental.IngestSplit != nil &&
+					d.opts.Experimental.IngestSplit() && d.FormatMajorVersion() >= ExperimentalFormatVirtualSSTables
+				f.Level, splitFile, err = findTargetLevel(
+					d.newIters, d.tableNewRangeKeyIter, iterOps, d.cmp, current, baseLevel, d.mu.compact.inProgress, m, shouldIngestSplit)
+			}
+
+			if splitFile != nil {
+				if invariants.Enabled {
+					if lf := current.Levels[f.Level].Find(d.cmp, splitFile); lf == nil {
+						panic("splitFile returned is not in level it should be")
+					}
+				}
+				// We take advantage of the fact that we won't drop the db mutex
+				// between now and the call to logAndApply. So, no files should
+				// get added to a new in-progress compaction at this point. We can
+				// avoid having to iterate on in-progress compactions to cancel them
+				// if none of the files being split have a compacting state.
+				if splitFile.IsCompacting() {
+					checkCompactions = true
+				}
+				filesToSplit = append(filesToSplit, ingestSplitFile{ingestFile: m, splitFile: splitFile, level: f.Level})
 			}
 		}
 		if err != nil {
@@ -1901,6 +2050,26 @@ func (d *DB) ingestApply(
 		levelMetrics.BytesIngested += m.Size
 		levelMetrics.TablesIngested++
 	}
+	// replacedFiles maps files excised due to exciseSpan (or splitFiles returned
+	// by ingestTargetLevel), to files that were created to replace it. This map
+	// is used to resolve references to split files in filesToSplit, as it is
+	// possible for a file that we want to split to no longer exist or have a
+	// newer fileMetadata due to a split induced by another ingestion file, or an
+	// excise.
+	replacedFiles := make(map[base.FileNum][]newFileEntry)
+	updateLevelMetricsOnExcise := func(m *fileMetadata, level int, added []newFileEntry) {
+		levelMetrics := metrics[level]
+		if levelMetrics == nil {
+			levelMetrics = &LevelMetrics{}
+			metrics[level] = levelMetrics
+		}
+		levelMetrics.NumFiles--
+		levelMetrics.Size -= int64(m.Size)
+		for i := range added {
+			levelMetrics.NumFiles++
+			levelMetrics.Size += int64(added[i].Meta.Size)
+		}
+	}
 	if exciseSpan.Valid() {
 		// Iterate through all levels and find files that intersect with exciseSpan.
 		//
@@ -1934,20 +2103,18 @@ func (d *DB) ingestApply(
 					// We did not excise this file.
 					continue
 				}
-				levelMetrics := metrics[level]
-				if levelMetrics == nil {
-					levelMetrics = &LevelMetrics{}
-					metrics[level] = levelMetrics
-				}
-				levelMetrics.NumFiles--
-				levelMetrics.Size -= int64(m.Size)
-				for i := range excised {
-					levelMetrics.NumFiles++
-					levelMetrics.Size += int64(excised[i].Meta.Size)
-				}
+				replacedFiles[m.FileNum] = excised
+				updateLevelMetricsOnExcise(m, level, excised)
 			}
 		}
 	}
+	if len(filesToSplit) > 0 {
+		// For the same reasons as the above call to excise, we hold the db mutex
+		// while calling this method.
+		if err := d.ingestSplit(ve, updateLevelMetricsOnExcise, filesToSplit, exciseSpan, replacedFiles); err != nil {
+			return nil, err
+		}
+	}
 	for c := range d.mu.compact.inProgress {
 		if c.versionEditApplied {
 			continue
@@ -1959,8 +2126,22 @@ func (d *DB) ingestApply(
 		// doing a [c,d) excise at the same time as this compaction, we will have
 		// to error out the whole compaction as we can't guarantee it hasn't/won't
 		// write a file overlapping with the excise span.
-		if exciseSpan.OverlapsInternalKeyRange(d.cmp, c.smallest, c.largest) {
+		if exciseSpan.Valid() && exciseSpan.OverlapsInternalKeyRange(d.cmp, c.smallest, c.largest) {
 			c.cancel.Store(true)
+			continue
+		}
+		// Check if this compaction's inputs have been replaced due to an ingest-time
+		// split. In that case, cancel the compaction.
+		if checkCompactions {
+			for i := range c.inputs {
+				iter := c.inputs[i].files.Iter()
+				for f := iter.First(); f != nil; f = iter.Next() {
+					if _, ok := replacedFiles[f.FileNum]; ok {
+						c.cancel.Store(true)
+						break
+					}
+				}
+			}
 		}
 	}
 	if err := d.mu.versions.logAndApply(jobID, ve, metrics, false /* forceRotation */, func() []compactionInfo {
diff --git a/ingest_test.go b/ingest_test.go
index 79379ad995..bd25ca754a 100644
--- a/ingest_test.go
+++ b/ingest_test.go
@@ -1490,16 +1490,28 @@ func TestIngestTargetLevel(t *testing.T) {
 
 		case "target":
 			var buf bytes.Buffer
+			suggestSplit := false
+			for _, cmd := range td.CmdArgs {
+				switch cmd.Key {
+				case "suggest-split":
+					suggestSplit = true
+				}
+			}
 			for _, target := range strings.Split(td.Input, "\n") {
 				meta := parseMeta(target)
-				level, err := ingestTargetLevel(
+				level, overlapFile, err := ingestTargetLevel(
 					d.newIters, d.tableNewRangeKeyIter, IterOptions{logger: d.opts.Logger},
 					d.cmp, d.mu.versions.currentVersion(), 1, d.mu.compact.inProgress, meta,
+					suggestSplit,
 				)
 				if err != nil {
 					return err.Error()
 				}
-				fmt.Fprintf(&buf, "%d\n", level)
+				if overlapFile != nil {
+					fmt.Fprintf(&buf, "%d (split file: %s)\n", level, overlapFile.FileNum)
+				} else {
+					fmt.Fprintf(&buf, "%d\n", level)
+				}
 			}
 			return buf.String()
 
@@ -1517,7 +1529,7 @@ func TestIngest(t *testing.T) {
 		require.NoError(t, d.Close())
 	}()
 
-	reset := func() {
+	reset := func(split bool) {
 		if d != nil {
 			require.NoError(t, d.Close())
 		}
@@ -1534,6 +1546,9 @@ func TestIngest(t *testing.T) {
 			}},
 			FormatMajorVersion: internalFormatNewest,
 		}
+		opts.Experimental.IngestSplit = func() bool {
+			return split
+		}
 		// Disable automatic compactions because otherwise we'll race with
 		// delete-only compactions triggered by ingesting range tombstones.
 		opts.DisableAutomaticCompactions = true
@@ -1542,12 +1557,21 @@ func TestIngest(t *testing.T) {
 		d, err = Open("", opts)
 		require.NoError(t, err)
 	}
-	reset()
+	reset(false /* split */)
 
 	datadriven.RunTest(t, "testdata/ingest", func(t *testing.T, td *datadriven.TestData) string {
 		switch td.Cmd {
 		case "reset":
-			reset()
+			split := false
+			for _, cmd := range td.CmdArgs {
+				switch cmd.Key {
+				case "enable-split":
+					split = true
+				default:
+					return fmt.Sprintf("unexpected key: %s", cmd.Key)
+				}
+			}
+			reset(split)
 			return ""
 		case "batch":
 			b := d.NewIndexedBatch()
diff --git a/options.go b/options.go
index d009ab13e1..57636049ab 100644
--- a/options.go
+++ b/options.go
@@ -540,6 +540,11 @@ type Options struct {
 		// concurrency slots as determined by the two options is chosen.
 		CompactionDebtConcurrency int
 
+		// IngestSplit, if it returns true, allows for ingest-time splitting of
+		// existing sstables into two virtual sstables to allow ingestion sstables to
+		// slot into a lower level than they otherwise would have.
+		IngestSplit func() bool
+
 		// ReadCompactionRate controls the frequency of read triggered
 		// compactions by adjusting `AllowedSeeks` in manifest.FileMetadata:
 		//
diff --git a/testdata/ingest b/testdata/ingest
index 62dcd7e654..cb142368e5 100644
--- a/testdata/ingest
+++ b/testdata/ingest
@@ -906,3 +906,137 @@ lsm
   000006:[f#12,SET-h#12,SET]
   000010:[s#16,RANGEKEYDEL-x#inf,RANGEKEYDEL]
   000009:[x#15,SET-y#15,SET]
+
+reset enable-split
+----
+
+build ext10
+set a foo
+set e bar
+----
+
+ingest ext10
+----
+
+lsm
+----
+6:
+  000004:[a#10,SET-e#10,SET]
+
+# The below ingestion should split one existing file.
+
+build ext11
+set b foobar
+set d foobar
+----
+
+ingest ext11
+----
+
+lsm
+----
+6:
+  000006:[a#10,SET-a#10,SET]
+  000005:[b#11,SET-d#11,SET]
+  000007:[e#10,SET-e#10,SET]
+
+iter
+first
+next
+next
+next
+----
+a: (foo, .)
+b: (foobar, .)
+d: (foobar, .)
+e: (bar, .)
+
+# This ingestion should not split any files due to data overlap.
+
+build ext12
+set c foobar
+set e baz
+----
+
+ingest ext12
+----
+
+lsm
+----
+0.0:
+  000008:[c#12,SET-e#12,SET]
+6:
+  000006:[a#10,SET-a#10,SET]
+  000005:[b#11,SET-d#11,SET]
+  000007:[e#10,SET-e#10,SET]
+
+# The below ingestion should fall through one existing file and split another
+# file.
+
+build ext13
+set cc foo
+set ccc foooo
+----
+
+ingest ext13
+----
+
+lsm
+----
+0.0:
+  000008:[c#12,SET-e#12,SET]
+6:
+  000006:[a#10,SET-a#10,SET]
+  000010:[b#11,SET-b#11,SET]
+  000009:[cc#13,SET-ccc#13,SET]
+  000011:[d#11,SET-d#11,SET]
+  000007:[e#10,SET-e#10,SET]
+
+iter
+seek-ge c
+next
+next
+next
+next
+----
+c: (foobar, .)
+cc: (foo, .)
+ccc: (foooo, .)
+d: (foobar, .)
+e: (baz, .)
+
+# Ingestion splitting doesn't kick in at L0.
+
+build ext14
+set d updated
+set dd new
+----
+
+ingest ext14
+----
+
+lsm
+----
+0.1:
+  000012:[d#14,SET-dd#14,SET]
+0.0:
+  000008:[c#12,SET-e#12,SET]
+6:
+  000006:[a#10,SET-a#10,SET]
+  000010:[b#11,SET-b#11,SET]
+  000009:[cc#13,SET-ccc#13,SET]
+  000011:[d#11,SET-d#11,SET]
+  000007:[e#10,SET-e#10,SET]
+
+iter
+seek-lt d
+next
+next
+next
+next
+----
+ccc: (foooo, .)
+d: (updated, .)
+dd: (new, .)
+e: (baz, .)
+.
diff --git a/testdata/ingest_target_level b/testdata/ingest_target_level
index 66d8d65924..e6b8edf1a3 100644
--- a/testdata/ingest_target_level
+++ b/testdata/ingest_target_level
@@ -246,3 +246,79 @@ target
 rkey:a-c
 ----
 4
+
+# Cases with boundary overlap and no data overlap. With suggest-split off
+# we get a target level of L0, but with suggest-split on, we get suggested
+# a file split.
+
+define
+L6
+  a.SET.2:2
+  d.SET.3:3
+L6
+  f.SET.4:4
+  k.SET.6:6
+----
+6:
+  000004:[a#2,SET-d#3,SET]
+  000005:[f#4,SET-k#6,SET]
+
+target
+b-c
+e-g
+----
+5
+5
+
+target suggest-split
+b-c
+e-g
+----
+6 (split file: 000004)
+5
+
+target suggest-split
+g-i
+----
+6 (split file: 000005)
+
+# suggest-split recognizes and avoids in-progress compactions.
+
+define
+L6
+  a.SET.2:2
+  d.SET.3:3
+L6
+  f.SET.4:4
+  k.SET.6:6
+  compact:f-k
+----
+6:
+  000004:[a#2,SET-d#3,SET]
+  000005:[f#4,SET-k#6,SET]
+
+target suggest-split
+g-i
+----
+5
+
+# Ingestion splitting correctly recognizes data overlap in L6, and suggests
+# split in L5.
+
+define
+L5
+  a.SET.2:2
+  e.SET.3:3
+L6
+  c.SET.1:1
+  k.SET.1:1
+----
+5:
+  000004:[a#2,SET-e#3,SET]
+6:
+  000005:[c#1,SET-k#1,SET]
+
+target suggest-split
+b-c
+----
+5 (split file: 000004)