Skip to content

Commit

Permalink
db: move size check for remote files to table stats job
Browse files Browse the repository at this point in the history
This change moves the Size() check for remote files to the
asynchronous table stats loading job instead of the previous
synchronous check during Open(). This greatly speeds up the time
it takes to load up Pebble and make a node live, if the store had
a lot of remote files before the restart.
  • Loading branch information
itsbilal committed Nov 1, 2023
1 parent 424019f commit 86ec1c4
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 8 deletions.
27 changes: 25 additions & 2 deletions ingest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,7 @@ func testIngestSharedImpl(
}()
creatorIDCounter := uint64(1)
replicateCounter := 1
var opts1, opts2 *Options

reset := func() {
for _, e := range efos {
Expand All @@ -822,7 +823,7 @@ func testIngestSharedImpl(
mem2 := vfs.NewMem()
require.NoError(t, mem1.MkdirAll("ext", 0755))
require.NoError(t, mem2.MkdirAll("ext", 0755))
opts1 := &Options{
opts1 = &Options{
Comparer: testkeys.Comparer,
FS: mem1,
LBaseMaxBytes: 1,
Expand All @@ -843,7 +844,7 @@ func testIngestSharedImpl(
// delete-only compactions triggered by ingesting range tombstones.
opts1.DisableAutomaticCompactions = true

opts2 := &Options{}
opts2 = &Options{}
*opts2 = *opts1
opts2.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
"": sstorage,
Expand All @@ -867,6 +868,28 @@ func testIngestSharedImpl(

datadriven.RunTest(t, fmt.Sprintf("testdata/%s", fileName), func(t *testing.T, td *datadriven.TestData) string {
switch td.Cmd {
case "restart":
for _, e := range efos {
require.NoError(t, e.Close())
}
if d1 != nil {
require.NoError(t, d1.Close())
}
if d2 != nil {
require.NoError(t, d2.Close())
}

var err error
d1, err = Open("", opts1)
if err != nil {
return err.Error()
}
d2, err = Open("", opts2)
if err != nil {
return err.Error()
}
d = d1
return "ok, note that the active db has been set to 1 (use 'switch' to change)"
case "reset":
reset()
return ""
Expand Down
7 changes: 3 additions & 4 deletions open.go
Original file line number Diff line number Diff line change
Expand Up @@ -1141,13 +1141,12 @@ func checkConsistency(v *manifest.Version, dirname string, objProvider objstorag
dedup[backingState.DiskFileNum] = struct{}{}
fileNum := backingState.DiskFileNum
fileSize := backingState.Size
// We allow foreign objects to have a mismatch between sizes. This is
// because we might skew the backing size stored by our objprovider
// to prevent us from over-prioritizing this file for compaction.
// We skip over remote objects; those are instead checked asynchronously
// by the table stats loading job.
meta, err := objProvider.Lookup(base.FileTypeTable, fileNum)
var size int64
if err == nil {
if objProvider.IsSharedForeign(meta) {
if meta.IsRemote() {
continue
}
size, err = objProvider.Size(meta)
Expand Down
37 changes: 37 additions & 0 deletions table_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"math"

"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/keyspan"
"github.com/cockroachdb/pebble/internal/manifest"
Expand Down Expand Up @@ -214,6 +215,7 @@ func (d *DB) scanReadStateTableStats(
) ([]collectedStats, []deleteCompactionHint, bool) {
moreRemain := false
var hints []deleteCompactionHint
sizesChecked := make(map[base.DiskFileNum]struct{})
for l, levelMetadata := range rs.current.Levels {
iter := levelMetadata.Iter()
for f := iter.First(); f != nil; f = iter.Next() {
Expand All @@ -236,6 +238,41 @@ func (d *DB) scanReadStateTableStats(
return fill, hints, moreRemain
}

// If the file is remote and not SharedForeign, we should check if its size
// matches. This is because checkConsistency skips over remote files.
// SharedForeign files are skipped as their sizes are allowed to have a
// mismatch; the size stored in the FileBacking is just the part of the
// file that is referenced by this Pebble instance, not the size of the
// whole object.
objMeta, err := d.objProvider.Lookup(fileTypeTable, f.FileBacking.DiskFileNum)
if err != nil {
// Set `moreRemain` so we'll try again.
moreRemain = true
d.opts.EventListener.BackgroundError(err)
continue
}
if _, ok := sizesChecked[f.FileBacking.DiskFileNum]; !ok && objMeta.IsRemote() &&
!d.objProvider.IsSharedForeign(objMeta) {

size, err := d.objProvider.Size(objMeta)
fileSize := f.FileBacking.Size
if err != nil {
moreRemain = true
d.opts.EventListener.BackgroundError(err)
continue
}
if size != int64(fileSize) {
err := errors.Errorf(
"during consistency check in loadTableStats: L%d: %s: object size mismatch (%s): %d (provider) != %d (MANIFEST)",
errors.Safe(l), f.FileNum, d.objProvider.Path(objMeta),
errors.Safe(size), errors.Safe(fileSize))
d.opts.EventListener.BackgroundError(err)
d.opts.Logger.Fatalf("%s", err)
}

sizesChecked[f.FileBacking.DiskFileNum] = struct{}{}
}

stats, newHints, err := d.loadTableStats(
rs.current, l, f,
)
Expand Down
21 changes: 19 additions & 2 deletions testdata/ingest_shared
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ replicate 2 1 a z
----
replicated 3 shared SSTs

switch 1
restart
----
ok
ok, note that the active db has been set to 1 (use 'switch' to change)

iter
first
Expand Down Expand Up @@ -635,6 +635,23 @@ b@5: (foo, .)
ff: (notdeleted, .)
.

restart
----
ok, note that the active db has been set to 1 (use 'switch' to change)

switch 2
----
ok

iter
seek-ge b
next
next
----
b@5: (foo, .)
ff: (notdeleted, .)
.

# Same as above, but with a truncated range key instead of a truncated range del.

reset
Expand Down

0 comments on commit 86ec1c4

Please sign in to comment.