importccl: Direct-ingest uses two bulk adders instead of one.

This is another change to stabilize direct ingest import before it is made the default. As a consequence of cockroachdb#39271, the number of files (L0 and total), along with the cumulative compaction size increased drastically. A consequence of no longer creating buckets of TableIDIndexID before flushing is that the single bulk adder would receive a mix of primary and secondary index entries. Since SSTs cannot span across the splits we inserted between index spans, it would create numerous, small secondary index SSTs along with the bigger primary index SSTs, and flush on reaching its limit (which would be often). By introducing two adders, one for ingesting primary index data, and the other for ingesting secondary index data we regain the ability to make fewer, bigger secondary index SSTs and flush less often. The peak mem is lower than what prebuffering used to hit, while the number of files (L0 and total), and the cumulative compaction size return to prebuffering levels. Some stats below for a tpcc 1k, on a 1 node cluster. With prebuffering: Total Files : 7670 L0 Files : 1848 Cumulative Compaction (GB): 24.54GiB Without prebuffering, one adder: Total Files : 22420 L0 Files : 16900 Cumulative Compaction (GB): 52.43 GiB Without prebuffering, two adders: Total Files : 6805 L0 Files : 1078 Cumulative Compaction (GB): 18.89GiB Release note: None
ajwerner · Aug 8, 2019 · e0d214f · e0d214f
1 parent a78892f
commit e0d214f
Show file tree

Hide file tree

Showing 3 changed files with 72 additions and 20 deletions.
diff --git a/pkg/ccl/importccl/read_import_proc.go b/pkg/ccl/importccl/read_import_proc.go
@@ -377,11 +377,18 @@ func (cp *readImportDataProcessor) doRun(ctx context.Context) error {
 
 			writeTS := hlc.Timestamp{WallTime: cp.spec.WalltimeNanos}
 			const bufferSize, flushSize = 64 << 20, 16 << 20
-			adder, err := cp.flowCtx.Cfg.BulkAdder(ctx, cp.flowCtx.Cfg.DB, bufferSize, flushSize, writeTS)
+
+			// We create two bulk adders so as to combat the excessive flushing of
+			// small SSTs which was observed when using a single adder for both
+			// primary and secondary index kvs. The number of secondary index kvs are
+			// small, and so we expect the indexAdder to flush much less frequently
+			// than the pkIndexAdder.
+			pkIndexAdder, err := cp.flowCtx.Cfg.BulkAdder(ctx, cp.flowCtx.Cfg.DB, bufferSize, flushSize, writeTS)
 			if err != nil {
 				return err
 			}
-			adder.SetDisallowShadowing(true)
+			pkIndexAdder.SetName("pkIndexAdder")
+			pkIndexAdder.SetDisallowShadowing(true)
 			// AddSSTable with disallowShadowing=true does not consider a KV with the
 			// same ts and value to be a collision. This is to support the resumption
 			// of IMPORT jobs which might re-import some already ingested, but not
@@ -390,16 +397,26 @@ func (cp *readImportDataProcessor) doRun(ctx context.Context) error {
 			// To provide a similar behavior with KVs within the same SST, we silently
 			// skip over duplicates with the same value, instead of throwing a
 			// uniqueness error.
-			adder.SkipLocalDuplicatesWithSameValues(true)
-			defer adder.Close(ctx)
+			pkIndexAdder.SkipLocalDuplicatesWithSameValues(true)
+			defer pkIndexAdder.Close(ctx)
+
+			indexAdder, err := cp.flowCtx.Cfg.BulkAdder(ctx, cp.flowCtx.Cfg.DB, bufferSize, flushSize, writeTS)
+			if err != nil {
+				return err
+			}
+			indexAdder.SetName("indexAdder")
+			indexAdder.SetDisallowShadowing(true)
+			indexAdder.SkipLocalDuplicatesWithSameValues(true)
+			defer indexAdder.Close(ctx)
 
 			// Drain the kvCh using the BulkAdder until it closes.
-			if err := ingestKvs(ctx, adder, kvCh); err != nil {
+			if err := ingestKvs(ctx, pkIndexAdder, indexAdder, kvCh); err != nil {
 				return err
 			}
 
-			added := adder.GetSummary()
-			countsBytes, err := protoutil.Marshal(&added)
+			addedSummary := pkIndexAdder.GetSummary()
+			addedSummary.Add(indexAdder.GetSummary())
+			countsBytes, err := protoutil.Marshal(&addedSummary)
 			if err != nil {
 				return err
 			}
@@ -521,7 +538,10 @@ func wrapRowErr(err error, file string, row int64, code, format string, args ...
 // ingestKvs drains kvs from the channel until it closes, ingesting them using
 // the BulkAdder. It handles the required buffering/sorting/etc.
 func ingestKvs(
-	ctx context.Context, adder storagebase.BulkAdder, kvCh <-chan []roachpb.KeyValue,
+	ctx context.Context,
+	pkIndexAdder storagebase.BulkAdder,
+	indexAdder storagebase.BulkAdder,
+	kvCh <-chan []roachpb.KeyValue,
 ) error {
 	// We insert splits at every index span of the table prior to the invocation
 	// of this method. Since the BulkAdder is split aware when constructing SSTs,
@@ -544,24 +564,44 @@ func ingestKvs(
 	// mentioned above, the KVs sent to the BulkAdder are no longer grouped which
 	// results in flushing a much larger number of small SSTs. This increases the
 	// number of L0 (and total) files, but with a lower memory usage.
-	//
-	// TODO(adityamaru): Once the roachtest import/experimental-direct-ingestion
-	// stabilizes, we can explore a two adder approach where we send primary
-	// indexes to one, and secondary indexes to the other. This should reduce the
-	// number of L0 (and total) files to be comparable to the previous
-	// implementation with pre-buffering.
 	for kvBatch := range kvCh {
 		for _, kv := range kvBatch {
-			if err := adder.Add(ctx, kv.Key, kv.Value.RawBytes); err != nil {
-				if _, ok := err.(storagebase.DuplicateKeyError); ok {
-					return errors.WithStack(err)
+			_, _, indexID, indexErr := sqlbase.DecodeTableIDIndexID(kv.Key)
+			if indexErr != nil {
+				return indexErr
+			}
+
+			// Decide which adder to send the KV to by extracting its index id.
+			//
+			// TODO(adityamaru): There is a potential optimization of plumbing the
+			// different putters, and differentiating based on their type. It might be
+			// more efficient than parsing every kv.
+			if indexID == 1 {
+				if err := pkIndexAdder.Add(ctx, kv.Key, kv.Value.RawBytes); err != nil {
+					if _, ok := err.(storagebase.DuplicateKeyError); ok {
+						return errors.WithStack(err)
+					}
+					return err
+				}
+			} else {
+				if err := indexAdder.Add(ctx, kv.Key, kv.Value.RawBytes); err != nil {
+					if _, ok := err.(storagebase.DuplicateKeyError); ok {
+						return errors.WithStack(err)
+					}
+					return err
 				}
-				return err
 			}
 		}
 	}
 
-	if err := adder.Flush(ctx); err != nil {
+	if err := pkIndexAdder.Flush(ctx); err != nil {
+		if err, ok := err.(storagebase.DuplicateKeyError); ok {
+			return errors.WithStack(err)
+		}
+		return err
+	}
+
+	if err := indexAdder.Flush(ctx); err != nil {
 		if err, ok := err.(storagebase.DuplicateKeyError); ok {
 			return errors.WithStack(err)
 		}

diff --git a/pkg/storage/bulk/buffering_adder.go b/pkg/storage/bulk/buffering_adder.go
@@ -40,6 +40,9 @@ type BufferingAdder struct {
 		total      int
 		bufferSize int
 	}
+
+	// name of the BufferingAdder for the purpose of logging only.
+	name string
 }
 
 // MakeBulkAdder makes a storagebase.BulkAdder that buffers and sorts K/Vs passed
@@ -72,10 +75,17 @@ func (b *BufferingAdder) SkipLocalDuplicatesWithSameValues(skip bool) {
 	b.sink.skipDuplicateKeysWithSameValue = skip
 }
 
+// SetName sets the name of the adder being used for the purpose of logging
+// stats.
+func (b *BufferingAdder) SetName(name string) {
+	b.name = name
+}
+
 // Close closes the underlying SST builder.
 func (b *BufferingAdder) Close(ctx context.Context) {
 	log.VEventf(ctx, 2,
-		"bulk adder ingested %s, flushed %d times, %d due to buffer size. Flushed %d files, %d due to ranges, %d due to sst size",
+		"bulk adder %s ingested %s, flushed %d times, %d due to buffer size. Flushed %d files, %d due to ranges, %d due to sst size",
+		b.name,
 		sz(b.sink.totalRows.DataSize),
 		b.flushCounts.total, b.flushCounts.bufferSize,
 		b.sink.flushCounts.total, b.sink.flushCounts.split, b.sink.flushCounts.sstSize,

diff --git a/pkg/storage/storagebase/bulk_adder.go b/pkg/storage/storagebase/bulk_adder.go
@@ -46,6 +46,8 @@ type BulkAdder interface {
 	// SetDisallowShadowing sets the flag which controls whether shadowing of
 	// existing keys is permitted in the AddSSTable method.
 	SetDisallowShadowing(bool)
+	// SetName sets the name of the adder for the purpose of logging adder stats.
+	SetName(string)
 }
 
 // DuplicateKeyError represents a failed attempt to ingest the same key twice