From bd90489d90cfe7f9fef0eb2f2c4a635d15c23d1e Mon Sep 17 00:00:00 2001 From: Spas Bojanov Date: Thu, 30 Jan 2020 13:10:11 -0500 Subject: [PATCH 1/4] jobs: always set start time of a job When starting a job via CreateAndStartJob if making the job started fails the job will stil be in system.jobs and can be adopted but the started time will be 0. In that case mark it with the current time. Release note: none. --- pkg/jobs/jobs.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/jobs/jobs.go b/pkg/jobs/jobs.go index 0d6ea0ddd6cf..80b6be500b2b 100644 --- a/pkg/jobs/jobs.go +++ b/pkg/jobs/jobs.go @@ -575,6 +575,9 @@ func (j *Job) adopt(ctx context.Context, oldLease *jobspb.Lease) error { md.Payload.Lease, oldLease) } md.Payload.Lease = j.registry.newLease() + if md.Payload.StartedMicros == 0 { + md.Payload.StartedMicros = timeutil.ToUnixMicros(j.registry.clock.Now().GoTime()) + } ju.UpdatePayload(md.Payload) // Jobs in states running or pending are adopted as running. newStatus := StatusRunning From cddc665c86b84d0caa0e8620616bd3852a1e2018 Mon Sep 17 00:00:00 2001 From: Andrew Werner Date: Thu, 30 Jan 2020 11:24:22 -0500 Subject: [PATCH 2/4] engine: alter the meaning of targetSize in ExportToSst In #44440 we added a `targetSize` parameter to enable pagination of export requests. In that PR we defined the targetSize to return just before the key that would lead to the `targetSize` being exceeded. This definition is unfortunate when thinking about a total size limit for pagination in the DistSender (which we'll add when #44341 comes in). Imagine a case where we set a total byte limit of 1MB and a file byte limit of 1MB. That setting should lead to at most a single file being emitted (assuming one range holds enough data). If we used the previous definition we'd create a file which is just below 1MB and then the DistSender would need send another request which would contain a tiny amount of data. This brings the behavior in line with the semantics introduced in #44341 for ScanRequests and is just easier to reason about. Release note: None --- c-deps/libroach/db.cc | 6 +++--- c-deps/libroach/include/libroach.h | 10 +++------- pkg/ccl/storageccl/export_test.go | 19 ++++++++++++++++++- pkg/storage/engine/engine.go | 12 ++++-------- pkg/storage/engine/pebble.go | 6 +++--- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/c-deps/libroach/db.cc b/c-deps/libroach/db.cc index 5c83ce9904b3..28ebc9597c30 100644 --- a/c-deps/libroach/db.cc +++ b/c-deps/libroach/db.cc @@ -1136,9 +1136,8 @@ DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64 // Check to see if this is the first version of key and adding it would // put us over the limit (we might already be over the limit). const int64_t cur_size = bulkop_summary.data_size(); - const int64_t new_size = cur_size + decoded_key.size() + iter.value().size(); - const bool is_over_target = cur_size > 0 && new_size > target_size; - if (paginated && is_new_key && is_over_target) { + const bool reached_target_size = cur_size > 0 && cur_size >= target_size; + if (paginated && is_new_key && reached_target_size) { resume_key.reserve(decoded_key.size()); resume_key.assign(decoded_key.data(), decoded_key.size()); break; @@ -1154,6 +1153,7 @@ DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64 if (!row_counter.Count((iter.key()), &bulkop_summary)) { return ToDBString("Error in row counter"); } + const int64_t new_size = cur_size + decoded_key.size() + iter.value().size(); bulkop_summary.set_data_size(new_size); } *summary = ToDBString(bulkop_summary.SerializeAsString()); diff --git a/c-deps/libroach/include/libroach.h b/c-deps/libroach/include/libroach.h index ad916cac3893..2b7d8fb650ea 100644 --- a/c-deps/libroach/include/libroach.h +++ b/c-deps/libroach/include/libroach.h @@ -556,13 +556,9 @@ DBStatus DBUnlockFile(DBFileLock lock); // DBExportToSst exports changes over the keyrange and time interval between the // start and end DBKeys to an SSTable using an IncrementalIterator. // If target_size is positive, it indicates that the export should produce SSTs -// which are roughly target size. Specifically, it will produce SSTs which contain -// all relevant versions of a key and will not add the first version of a new -// key if it would lead to the SST exceeding the target_size. If export_all_revisions -// is false, the returned SST will be smaller than target_size so long as the first -// kv pair is smaller than target_size. If export_all_revisions is true then -// target_size may be exceeded. If the SST construction stops due to the target_size, -// then resume will be set to the value of the resume key. +// which are roughly target size. Specifically, it will return an SST such that +// the last key is responsible for exceeding the targetSize. If the resume_key +// is non-NULL then the returns sst will exceed the targetSize. DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64_t target_size, DBIterOptions iter_opts, DBEngine* engine, DBString* data, DBString* write_intent, DBString* summary, DBString* resume); diff --git a/pkg/ccl/storageccl/export_test.go b/pkg/ccl/storageccl/export_test.go index 56728d5fecb7..99f975335df7 100644 --- a/pkg/ccl/storageccl/export_test.go +++ b/pkg/ccl/storageccl/export_test.go @@ -333,9 +333,26 @@ func assertEqualKVs( var kvs []engine.MVCCKeyValue for start := startKey; start != nil; { var sst []byte - sst, _, start, err = e.ExportToSst(start, endKey, startTime, endTime, exportAllRevisions, targetSize, io) + var summary roachpb.BulkOpSummary + sst, summary, start, err = e.ExportToSst(start, endKey, startTime, endTime, exportAllRevisions, targetSize, io) require.NoError(t, err) loaded := loadSST(t, sst, startKey, endKey) + // Ensure that the pagination worked properly. + if start != nil { + dataSize := uint64(summary.DataSize) + require.Truef(t, targetSize <= dataSize, "%d > %d", + targetSize, summary.DataSize) + // Now we want to ensure that if we remove the bytes due to the last + // key that we are below the target size. + firstKVofLastKey := sort.Search(len(loaded), func(i int) bool { + return loaded[i].Key.Key.Equal(loaded[len(loaded)-1].Key.Key) + }) + dataSizeWithoutLastKey := dataSize + for _, kv := range loaded[firstKVofLastKey:] { + dataSizeWithoutLastKey -= uint64(kv.Key.Len() + len(kv.Value)) + } + require.Truef(t, targetSize > dataSizeWithoutLastKey, "%d <= %d", targetSize, dataSizeWithoutLastKey) + } kvs = append(kvs, loaded...) } diff --git a/pkg/storage/engine/engine.go b/pkg/storage/engine/engine.go index d2163fb1c1c8..207a6035557b 100644 --- a/pkg/storage/engine/engine.go +++ b/pkg/storage/engine/engine.go @@ -201,14 +201,10 @@ type Reader interface { // requested or if the start.Timestamp is non-zero. Returns the bytes of an // SSTable containing the exported keys, the size of exported data, or an error. // If targetSize is positive, it indicates that the export should produce SSTs - // which are roughly target size. Specifically, it will produce SSTs which contain - // all relevant versions of a key and will not add the first version of a new - // key if it would lead to the SST exceeding the targetSize. If exportAllRevisions - // is false, the returned SST will be smaller than target_size so long as the first - // kv pair is smaller than targetSize. If exportAllRevisions is true then - // targetSize may be exceeded by as much as the size of all of the versions of - // the last key. If the SST construction stops due to the targetSize, - // then a non-nil resumeKey will be returned. + // which are roughly target size. Specifically, it will return an SST such that + // the last key is responsible for meeting or exceeding the targetSize. If the + // resumeKey is non-nil then the data size of the returned sst will be greater + // than or equal to the targetSize. ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, targetSize uint64, io IterOptions, diff --git a/pkg/storage/engine/pebble.go b/pkg/storage/engine/pebble.go index d573cd462af1..2786fc391af9 100644 --- a/pkg/storage/engine/pebble.go +++ b/pkg/storage/engine/pebble.go @@ -1163,9 +1163,8 @@ func pebbleExportToSst( return nil, roachpb.BulkOpSummary{}, nil, errors.Wrapf(err, "decoding %s", unsafeKey) } curSize := rows.BulkOpSummary.DataSize - newSize := curSize + int64(len(unsafeKey.Key)+len(unsafeValue)) - isOverTarget := paginated && curSize > 0 && uint64(newSize) > targetSize - if isNewKey && isOverTarget { + reachedTargetSize := curSize > 0 && uint64(curSize) >= targetSize + if paginated && isNewKey && reachedTargetSize { // Allocate the right size for resumeKey rather than using curKey. resumeKey = append(make(roachpb.Key, 0, len(unsafeKey.Key)), unsafeKey.Key...) break @@ -1173,6 +1172,7 @@ func pebbleExportToSst( if err := sstWriter.Put(unsafeKey, unsafeValue); err != nil { return nil, roachpb.BulkOpSummary{}, nil, errors.Wrapf(err, "adding key %s", unsafeKey) } + newSize := curSize + int64(len(unsafeKey.Key)+len(unsafeValue)) rows.BulkOpSummary.DataSize = newSize } From 8429a42c5053064aed682ae2321865d5f2c6c968 Mon Sep 17 00:00:00 2001 From: Andrew Werner Date: Thu, 30 Jan 2020 14:02:44 -0500 Subject: [PATCH 3/4] engine: add a maxSize parameter to ExportToSst It would be a major change to allow pagination of exported SSTs within versions of an individual key. Given that the previous changes always include all versions of a key, there's a concern that keys with very large numbers of versions could create SSTs which could not be restored or which might OOM a server. We'd rather fail to create a backup than OOM or create an unusable backup. To deal with this, this commit adds a new maxSize parameter above which the ExportToSst call will fail. In a follow-up commit there will be a cluster setting to configure this value, for now it is set to unlimited. If customers are to hit this error when creating a backup they'll need to either set a lower GC TTL and run GC or use a point-in-time backup rather than a backup which contains all of the versions. The export tests were extended to ensure that this parameter behaves as expected and was stressed on the teeing engine to ensure that the behavior matches between pebble and rocksdb. Relates to #43356 CC @dt Release note: None --- c-deps/libroach/db.cc | 6 +++++- c-deps/libroach/include/libroach.h | 9 ++++++++- pkg/ccl/storageccl/export.go | 4 +++- pkg/ccl/storageccl/export_test.go | 25 +++++++++++++++++++++++-- pkg/storage/engine/engine.go | 9 ++++++++- pkg/storage/engine/pebble.go | 18 +++++++++++------- pkg/storage/engine/pebble_batch.go | 2 +- pkg/storage/engine/rocksdb.go | 18 ++++++++++-------- pkg/storage/engine/tee.go | 18 +++++++++--------- pkg/storage/spanset/batch.go | 4 ++-- 10 files changed, 80 insertions(+), 33 deletions(-) diff --git a/c-deps/libroach/db.cc b/c-deps/libroach/db.cc index 28ebc9597c30..d4901b54108a 100644 --- a/c-deps/libroach/db.cc +++ b/c-deps/libroach/db.cc @@ -1075,7 +1075,8 @@ DBStatus DBUnlockFile(DBFileLock lock) { return ToDBStatus(rocksdb::Env::Default()->UnlockFile((rocksdb::FileLock*)lock)); } -DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64_t target_size, +DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, + uint64_t target_size, uint64_t max_size, DBIterOptions iter_opts, DBEngine* engine, DBString* data, DBString* write_intent, DBString* summary, DBString* resume) { DBSstFileWriter* writer = DBSstFileWriterNew(); @@ -1154,6 +1155,9 @@ DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64 return ToDBString("Error in row counter"); } const int64_t new_size = cur_size + decoded_key.size() + iter.value().size(); + if (max_size > 0 && new_size > max_size) { + return FmtStatus("export size (%ld bytes) exceeds max size (%ld bytes)", new_size, max_size); + } bulkop_summary.set_data_size(new_size); } *summary = ToDBString(bulkop_summary.SerializeAsString()); diff --git a/c-deps/libroach/include/libroach.h b/c-deps/libroach/include/libroach.h index 2b7d8fb650ea..92dc828a776f 100644 --- a/c-deps/libroach/include/libroach.h +++ b/c-deps/libroach/include/libroach.h @@ -555,11 +555,18 @@ DBStatus DBUnlockFile(DBFileLock lock); // DBExportToSst exports changes over the keyrange and time interval between the // start and end DBKeys to an SSTable using an IncrementalIterator. +// // If target_size is positive, it indicates that the export should produce SSTs // which are roughly target size. Specifically, it will return an SST such that // the last key is responsible for exceeding the targetSize. If the resume_key // is non-NULL then the returns sst will exceed the targetSize. -DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, uint64_t target_size, +// +// If max_size is positive, it is an absolute maximum on byte size for the +// returned sst. If it is the case that the versions of the last key will lead +// to an SST that exceeds maxSize, an error will be returned. This parameter +// exists to prevent creating SSTs which are too large to be used. +DBStatus DBExportToSst(DBKey start, DBKey end, bool export_all_revisions, + uint64_t target_size, uint64_t max_size, DBIterOptions iter_opts, DBEngine* engine, DBString* data, DBString* write_intent, DBString* summary, DBString* resume); diff --git a/pkg/ccl/storageccl/export.go b/pkg/ccl/storageccl/export.go index 9040934a0577..08d58b120173 100644 --- a/pkg/ccl/storageccl/export.go +++ b/pkg/ccl/storageccl/export.go @@ -135,7 +135,9 @@ func evalExport( // target size for files and then paginate the actual export. The external // API may need to be modified to deal with the case where ReturnSST is true. const targetSize = 0 // unlimited - data, summary, resume, err := e.ExportToSst(args.Key, args.EndKey, args.StartTime, h.Timestamp, exportAllRevisions, targetSize, io) + const maxSize = 0 // unlimited + data, summary, resume, err := e.ExportToSst(args.Key, args.EndKey, + args.StartTime, h.Timestamp, exportAllRevisions, targetSize, maxSize, io) if resume != nil { return result.Result{}, crdberrors.AssertionFailedf("expected nil resume key with unlimited target size") } diff --git a/pkg/ccl/storageccl/export_test.go b/pkg/ccl/storageccl/export_test.go index 99f975335df7..ffd1b42d1ce2 100644 --- a/pkg/ccl/storageccl/export_test.go +++ b/pkg/ccl/storageccl/export_test.go @@ -334,7 +334,10 @@ func assertEqualKVs( for start := startKey; start != nil; { var sst []byte var summary roachpb.BulkOpSummary - sst, summary, start, err = e.ExportToSst(start, endKey, startTime, endTime, exportAllRevisions, targetSize, io) + maxSize := uint64(0) + prevStart := start + sst, summary, start, err = e.ExportToSst(start, endKey, startTime, endTime, + exportAllRevisions, targetSize, maxSize, io) require.NoError(t, err) loaded := loadSST(t, sst, startKey, endKey) // Ensure that the pagination worked properly. @@ -349,9 +352,27 @@ func assertEqualKVs( }) dataSizeWithoutLastKey := dataSize for _, kv := range loaded[firstKVofLastKey:] { - dataSizeWithoutLastKey -= uint64(kv.Key.Len() + len(kv.Value)) + dataSizeWithoutLastKey -= uint64(len(kv.Key.Key) + len(kv.Value)) } require.Truef(t, targetSize > dataSizeWithoutLastKey, "%d <= %d", targetSize, dataSizeWithoutLastKey) + // Ensure that maxSize leads to an error if exceeded. + // Note that this uses a relatively non-sensical value of maxSize which + // is equal to the targetSize. + maxSize = targetSize + dataSizeWhenExceeded := dataSize + for i := len(loaded) - 1; i >= 0; i-- { + kv := loaded[i] + lessThisKey := dataSizeWhenExceeded - uint64(len(kv.Key.Key)+len(kv.Value)) + if lessThisKey >= maxSize { + dataSizeWhenExceeded = lessThisKey + } else { + break + } + } + _, _, _, err = e.ExportToSst(prevStart, endKey, startTime, endTime, + exportAllRevisions, targetSize, maxSize, io) + require.Regexp(t, fmt.Sprintf("export size \\(%d bytes\\) exceeds max size \\(%d bytes\\)", + dataSizeWhenExceeded, maxSize), err) } kvs = append(kvs, loaded...) } diff --git a/pkg/storage/engine/engine.go b/pkg/storage/engine/engine.go index 207a6035557b..f70d48295e64 100644 --- a/pkg/storage/engine/engine.go +++ b/pkg/storage/engine/engine.go @@ -200,14 +200,21 @@ type Reader interface { // within the interval is exported. Deletions are included if all revisions are // requested or if the start.Timestamp is non-zero. Returns the bytes of an // SSTable containing the exported keys, the size of exported data, or an error. + // // If targetSize is positive, it indicates that the export should produce SSTs // which are roughly target size. Specifically, it will return an SST such that // the last key is responsible for meeting or exceeding the targetSize. If the // resumeKey is non-nil then the data size of the returned sst will be greater // than or equal to the targetSize. + // + // If maxSize is positive, it is an absolute maximum on byte size for the + // returned sst. If it is the case that the versions of the last key will lead + // to an SST that exceeds maxSize, an error will be returned. This parameter + // exists to prevent creating SSTs which are too large to be used. ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, - exportAllRevisions bool, targetSize uint64, io IterOptions, + exportAllRevisions bool, targetSize uint64, maxSize uint64, + io IterOptions, ) (sst []byte, _ roachpb.BulkOpSummary, resumeKey roachpb.Key, _ error) // Get returns the value for the given key, nil otherwise. // diff --git a/pkg/storage/engine/pebble.go b/pkg/storage/engine/pebble.go index 2786fc391af9..f1b628584739 100644 --- a/pkg/storage/engine/pebble.go +++ b/pkg/storage/engine/pebble.go @@ -492,10 +492,10 @@ func (p *Pebble) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - return pebbleExportToSst(p, startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + return pebbleExportToSst(p, startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) } // Get implements the Engine interface. @@ -939,10 +939,10 @@ func (p *pebbleReadOnly) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - return pebbleExportToSst(p, startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + return pebbleExportToSst(p, startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) } func (p *pebbleReadOnly) Get(key MVCCKey) ([]byte, error) { @@ -1061,10 +1061,10 @@ func (p *pebbleSnapshot) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - return pebbleExportToSst(p, startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + return pebbleExportToSst(p, startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) } // Get implements the Reader interface. @@ -1116,7 +1116,7 @@ func pebbleExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { sstFile := &MemFile{} @@ -1173,6 +1173,10 @@ func pebbleExportToSst( return nil, roachpb.BulkOpSummary{}, nil, errors.Wrapf(err, "adding key %s", unsafeKey) } newSize := curSize + int64(len(unsafeKey.Key)+len(unsafeValue)) + if maxSize > 0 && newSize > int64(maxSize) { + return nil, roachpb.BulkOpSummary{}, nil, + errors.Errorf("export size (%d bytes) exceeds max size (%d bytes)", newSize, maxSize) + } rows.BulkOpSummary.DataSize = newSize } diff --git a/pkg/storage/engine/pebble_batch.go b/pkg/storage/engine/pebble_batch.go index 5fb91a80f4cc..79d1fa1704db 100644 --- a/pkg/storage/engine/pebble_batch.go +++ b/pkg/storage/engine/pebble_batch.go @@ -93,7 +93,7 @@ func (p *pebbleBatch) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { panic("unimplemented") diff --git a/pkg/storage/engine/rocksdb.go b/pkg/storage/engine/rocksdb.go index 87266e0f1093..0bfe9a668f4e 100644 --- a/pkg/storage/engine/rocksdb.go +++ b/pkg/storage/engine/rocksdb.go @@ -780,7 +780,7 @@ func (r *RocksDB) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { start := MVCCKey{Key: startKey, Timestamp: startTS} @@ -791,8 +791,10 @@ func (r *RocksDB) ExportToSst( var bulkopSummary C.DBString var resumeKey C.DBString - err := statusToError(C.DBExportToSst(goToCKey(start), goToCKey(end), C.bool(exportAllRevisions), - C.uint64_t(targetSize), goToCIterOptions(io), r.rdb, &data, &intentErr, &bulkopSummary, &resumeKey)) + err := statusToError(C.DBExportToSst(goToCKey(start), goToCKey(end), + C.bool(exportAllRevisions), + C.uint64_t(targetSize), C.uint64_t(maxSize), + goToCIterOptions(io), r.rdb, &data, &intentErr, &bulkopSummary, &resumeKey)) if err != nil { if err.Error() == "WriteIntentError" { @@ -1005,10 +1007,10 @@ func (r *rocksDBReadOnly) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - return r.parent.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + return r.parent.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) } func (r *rocksDBReadOnly) Get(key MVCCKey) ([]byte, error) { @@ -1326,10 +1328,10 @@ func (r *rocksDBSnapshot) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - return r.parent.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + return r.parent.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) } // Get returns the value for the given key, nil otherwise using @@ -1736,7 +1738,7 @@ func (r *rocksDBBatch) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { panic("unimplemented") diff --git a/pkg/storage/engine/tee.go b/pkg/storage/engine/tee.go index 1095ac2cfa2d..82c55d418ecf 100644 --- a/pkg/storage/engine/tee.go +++ b/pkg/storage/engine/tee.go @@ -89,11 +89,11 @@ func (t *TeeEngine) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - eng1Sst, bulkOpSummary, resume1, err := t.eng1.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) - rocksSst, _, resume2, err2 := t.eng2.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + eng1Sst, bulkOpSummary, resume1, err := t.eng1.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) + rocksSst, _, resume2, err2 := t.eng2.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) if err = fatalOnErrorMismatch(t.ctx, err, err2); err != nil { return nil, bulkOpSummary, nil, err } @@ -672,11 +672,11 @@ func (t *TeeEngineReader) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - sst1, bulkOpSummary, resume1, err := t.reader1.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) - sst2, _, resume2, err2 := t.reader2.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + sst1, bulkOpSummary, resume1, err := t.reader1.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) + sst2, _, resume2, err2 := t.reader2.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) if err = fatalOnErrorMismatch(t.ctx, err, err2); err != nil { return nil, bulkOpSummary, nil, err } @@ -776,11 +776,11 @@ func (t *TeeEngineBatch) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - sst1, bulkOpSummary, resume1, err := t.batch1.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) - sst2, _, resume2, err2 := t.batch2.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + sst1, bulkOpSummary, resume1, err := t.batch1.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) + sst2, _, resume2, err2 := t.batch2.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) if err = fatalOnErrorMismatch(t.ctx, err, err2); err != nil { return nil, bulkOpSummary, nil, err } diff --git a/pkg/storage/spanset/batch.go b/pkg/storage/spanset/batch.go index 88f775cae082..949c9bb6c959 100644 --- a/pkg/storage/spanset/batch.go +++ b/pkg/storage/spanset/batch.go @@ -284,10 +284,10 @@ func (s spanSetReader) ExportToSst( startKey, endKey roachpb.Key, startTS, endTS hlc.Timestamp, exportAllRevisions bool, - targetSize uint64, + targetSize, maxSize uint64, io engine.IterOptions, ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { - return s.r.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, io) + return s.r.ExportToSst(startKey, endKey, startTS, endTS, exportAllRevisions, targetSize, maxSize, io) } func (s spanSetReader) Get(key engine.MVCCKey) ([]byte, error) { From fc5133981238ecd1ae6e3645d49c9249cd119425 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Wed, 29 Jan 2020 11:20:14 -0500 Subject: [PATCH 4/4] sql: bugfixes around writing the old primary key in pk changes This PR fixes two bugs: * The logic for when to rewrite the old primary key was broken resulting in the old primary key not being rewritten in many cases. * The old primary key being created was not properly dealing with its dangling interleave information. Release note: None --- pkg/sql/alter_table.go | 14 ++++++------- .../testdata/logic_test/alter_primary_key | 21 ++++++++++++++++++- pkg/sql/schema_changer_test.go | 2 +- pkg/sql/sqlbase/structured.go | 14 +++++++++++++ 4 files changed, 41 insertions(+), 10 deletions(-) diff --git a/pkg/sql/alter_table.go b/pkg/sql/alter_table.go index 6c7d252eaef6..a251ff17b20b 100644 --- a/pkg/sql/alter_table.go +++ b/pkg/sql/alter_table.go @@ -397,17 +397,15 @@ func (n *alterTableNode) startExec(params runParams) error { // TODO (rohany): gate this behind a flag so it doesn't happen all the time. // Create a new index that indexes everything the old primary index does, but doesn't store anything. - // TODO (rohany): is there an easier way of checking if the existing primary index was the - // automatically created one? - if len(n.tableDesc.PrimaryIndex.ColumnNames) == 1 && n.tableDesc.PrimaryIndex.ColumnNames[0] != "rowid" { + if !n.tableDesc.IsPrimaryIndexDefaultRowID() { oldPrimaryIndexCopy := protoutil.Clone(&n.tableDesc.PrimaryIndex).(*sqlbase.IndexDescriptor) - name := generateUniqueConstraintName( - "old_primary_key", - nameExists, - ) - oldPrimaryIndexCopy.Name = name + // Clear the name of the index so that it gets generated by AllocateIDs. + oldPrimaryIndexCopy.Name = "" oldPrimaryIndexCopy.StoreColumnIDs = nil oldPrimaryIndexCopy.StoreColumnNames = nil + // Make the copy of the old primary index not-interleaved. This decision + // can be revisited based on user experience. + oldPrimaryIndexCopy.Interleave = sqlbase.InterleaveDescriptor{} if err := addIndexMutationWithSpecificPrimaryKey(n.tableDesc, oldPrimaryIndexCopy, newPrimaryIndexDesc); err != nil { return err } diff --git a/pkg/sql/logictest/testdata/logic_test/alter_primary_key b/pkg/sql/logictest/testdata/logic_test/alter_primary_key index a542fe022cad..6c44f93f94f3 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_primary_key +++ b/pkg/sql/logictest/testdata/logic_test/alter_primary_key @@ -97,7 +97,7 @@ child CREATE TABLE child ( y INT8 NOT NULL, z INT8 NOT NULL, CONSTRAINT "primary" PRIMARY KEY (x ASC, y ASC, z ASC), - UNIQUE INDEX old_primary_key (x ASC), + UNIQUE INDEX child_x_key (x ASC), FAMILY fam_0_x_y_z (x, y, z) ) INTERLEAVE IN PARENT parent (x, y) @@ -151,6 +151,7 @@ child CREATE TABLE child ( y INT8 NOT NULL, z INT8 NOT NULL, CONSTRAINT "primary" PRIMARY KEY (y ASC, z ASC), + UNIQUE INDEX child_x_y_z_key (x ASC, y ASC, z ASC), FAMILY fam_0_x_y_z (x, y, z) ) @@ -194,6 +195,7 @@ child CREATE TABLE child ( z INT8 NOT NULL, w INT8 NULL, CONSTRAINT "primary" PRIMARY KEY (x ASC, y ASC, z ASC), + UNIQUE INDEX child_x_y_key (x ASC, y ASC), INDEX i (x ASC, w ASC) INTERLEAVE IN PARENT parent (x), FAMILY fam_0_x_y_z_w (x, y, z, w) ) INTERLEAVE IN PARENT parent (x) @@ -281,3 +283,20 @@ INSERT INTO t1 VALUES (100, 100, 100, 100) statement error insert on table "t4" violates foreign key constraint "fk3" INSERT INTO t4 VALUES (101) + +# Ensure that we still rewrite a primary index if the index column has name "rowid". +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (rowid INT PRIMARY KEY, y INT NOT NULL, FAMILY (rowid, y)); +ALTER TABLE t ALTER PRIMARY KEY USING COLUMNS (y) + +query TT +SHOW CREATE t +---- +t CREATE TABLE t ( + rowid INT8 NOT NULL, + y INT8 NOT NULL, + CONSTRAINT "primary" PRIMARY KEY (y ASC), + UNIQUE INDEX t_rowid_key (rowid ASC), + FAMILY fam_0_rowid_y (rowid, y) +) diff --git a/pkg/sql/schema_changer_test.go b/pkg/sql/schema_changer_test.go index 35f2b3c03cf1..31ce21c5e930 100644 --- a/pkg/sql/schema_changer_test.go +++ b/pkg/sql/schema_changer_test.go @@ -2322,7 +2322,7 @@ INSERT INTO t.test VALUES (1, 1, 1), (2, 2, 2), (3, 3, 3); y INT8 NOT NULL, z INT8 NULL, CONSTRAINT "primary" PRIMARY KEY (y ASC), - UNIQUE INDEX old_primary_key (x ASC), + UNIQUE INDEX test_x_key (x ASC), INDEX i (z ASC), FAMILY "primary" (x, y, z) )` diff --git a/pkg/sql/sqlbase/structured.go b/pkg/sql/sqlbase/structured.go index 0bd5563f3aa9..69aaf75f5d28 100644 --- a/pkg/sql/sqlbase/structured.go +++ b/pkg/sql/sqlbase/structured.go @@ -2808,6 +2808,20 @@ func (desc *TableDescriptor) IsInterleaved() bool { return false } +// IsPrimaryIndexDefaultRowID returns whether or not the table's primary +// index is the default primary key on the hidden rowid column. +func (desc *TableDescriptor) IsPrimaryIndexDefaultRowID() bool { + if len(desc.PrimaryIndex.ColumnIDs) != 1 { + return false + } + col, err := desc.FindColumnByID(desc.PrimaryIndex.ColumnIDs[0]) + if err != nil { + // Should never be in this case. + panic(err) + } + return col.Hidden && col.Name == "rowid" +} + // MakeMutationComplete updates the descriptor upon completion of a mutation. // There are three Validity types for the mutations: // Validated - The constraint has already been added and validated, should