Skip to content

Commit

Permalink
cmd: enable benchmarking shared storage including the secondary cache
Browse files Browse the repository at this point in the history
This commit makes it possible to run commands like 'pebble bench ycsb' against
shared storage & the secondary cache. To do this, I have introduced a version
of shared.Storage that is backed by local disk, as opposed to memory or blob
storage. This enables running too-large-for-memory benchmarks from a single
machine without access to blob storage. One reason this is convenient is that
today the code to communicate with blob storage is in the cockroachdb repo, not
the pebble repo.

The concrete benchmarking plan I have in mind is worth mentioning as
motivation. I will measure the maximum ycsb thruput of shared storage without
a secondary cache and shared storage with a secondary cache. Since shared
objects will be stored on a medium as fast as the secondary cache (both will be
on local disk), we expect better thruput without the secondary cache. With that
said, we hope the thruput difference in not large, as for the secondary cache
to be effective, it must make efficient use of the local disk it is backed by.
  • Loading branch information
joshimhoff committed Jul 13, 2023
1 parent 4606eaf commit 02413ad
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 9 deletions.
18 changes: 18 additions & 0 deletions cmd/pebble/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"github.com/cockroachdb/pebble"
"github.com/cockroachdb/pebble/bloom"
"github.com/cockroachdb/pebble/internal/bytealloc"
"github.com/cockroachdb/pebble/objstorage/shared"
"github.com/cockroachdb/pebble/vfs"
)

// DB specifies the minimal interfaces that need to be implemented to support
Expand Down Expand Up @@ -98,10 +100,26 @@ func newPebbleDB(dir string) DB {
opts.EventListener.WALDeleted = nil
}

if pathToLocalSharedStorage != "" {
opts.Experimental.SharedStorage = shared.MakeSimpleFactory(map[shared.Locator]shared.Storage{
// Store all shared objects on local disk, for convenience.
"": shared.NewLocalFS(pathToLocalSharedStorage, vfs.Default),
})
opts.Experimental.CreateOnShared = true
if secondaryCacheSize != 0 {
opts.Experimental.SecondaryCacheSize = secondaryCacheSize
}
}

p, err := pebble.Open(dir, opts)
if err != nil {
log.Fatal(err)
}
if pathToLocalSharedStorage != "" {
if err := p.SetCreatorID(1); err != nil {
log.Fatal(err)
}
}
return pebbleDB{
d: p,
ballast: make([]byte, 1<<30),
Expand Down
26 changes: 17 additions & 9 deletions cmd/pebble/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,19 @@ import (
)

var (
cacheSize int64
concurrency int
disableWAL bool
duration time.Duration
maxSize uint64
maxOpsPerSec = newRateFlag("")
verbose bool
waitCompactions bool
wipe bool
cacheSize int64
concurrency int
disableWAL bool
duration time.Duration
maxSize uint64
maxOpsPerSec = newRateFlag("")
verbose bool
waitCompactions bool
wipe bool
pathToLocalSharedStorage string
// If zero, or if !sharedStorageEnabled, secondary cache is
// not used.
secondaryCacheSize int64
)

func main() {
Expand Down Expand Up @@ -59,6 +63,10 @@ func main() {
for _, cmd := range []*cobra.Command{replayCmd, scanCmd, syncCmd, tombstoneCmd, writeBenchCmd, ycsbCmd} {
cmd.Flags().BoolVarP(
&verbose, "verbose", "v", false, "enable verbose event logging")
cmd.Flags().StringVar(
&pathToLocalSharedStorage, "shared-storage", "", "path to local shared storage (empty for no shared storage)")
cmd.Flags().Int64Var(
&secondaryCacheSize, "secondary-cache", 0, "secondary cache size in bytes")
}
for _, cmd := range []*cobra.Command{scanCmd, syncCmd, tombstoneCmd, ycsbCmd} {
cmd.Flags().Int64Var(
Expand Down
118 changes: 118 additions & 0 deletions objstorage/shared/localfs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

package shared

import (
"context"
"io"
"os"
"path"

"github.com/cockroachdb/pebble/vfs"
)

// NewLocalFS returns a vfs-backed implementation of the shared.Storage
// interface (for testing). All objects will be stored at the directory
// dirname.
func NewLocalFS(dirname string, fs vfs.FS) Storage {
store := &localFSStore{
dirname: dirname,
vfs: fs,
}
return store
}

// localFSStore is a vfs-backed implementation of the shared.Storage
// interface (for testing).
type localFSStore struct {
dirname string
vfs vfs.FS
}

var _ Storage = (*localFSStore)(nil)

// Close is part of the shared.Storage interface.
func (s *localFSStore) Close() error {
*s = localFSStore{}
return nil
}

// ReadObject is part of the shared.Storage interface.
func (s *localFSStore) ReadObject(
ctx context.Context, objName string,
) (_ ObjectReader, objSize int64, _ error) {
f, err := s.vfs.Open(path.Join(s.dirname, objName))
if err != nil {
return nil, 0, err
}
stat, err := f.Stat()
if err != nil {
return nil, 0, err
}

return &localFSReader{f}, stat.Size(), nil
}

type localFSReader struct {
file vfs.File
}

var _ ObjectReader = (*localFSReader)(nil)

// ReadAt is part of the shared.ObjectReader interface.
func (r *localFSReader) ReadAt(_ context.Context, p []byte, offset int64) error {
n, err := r.file.ReadAt(p, offset)
// https://pkg.go.dev/io#ReaderAt
if err == io.EOF && n == len(p) {
return nil
}
return err
}

// Close is part of the shared.ObjectReader interface.
func (r *localFSReader) Close() error {
r.file.Close()
r.file = nil
return nil
}

// CreateObject is part of the shared.Storage interface.
func (s *localFSStore) CreateObject(objName string) (io.WriteCloser, error) {
file, err := s.vfs.Create(path.Join(s.dirname, objName))
return file, err
}

// List is part of the shared.Storage interface.
func (s *localFSStore) List(prefix, delimiter string) ([]string, error) {
// TODO(josh): For the intended use case of localfs.go of running 'pebble bench',
// List can always return <nil, nil>, since this indicates a file has only one ref,
// and since `pebble bench` implies running in a single-pebble-instance context.
// https://github.com/cockroachdb/pebble/blob/a9a079d4fb6bf4a9ebc52e4d83a76ad4cbf676cb/objstorage/objstorageprovider/shared.go#L292
return nil, nil
}

// Delete is part of the shared.Storage interface.
func (s *localFSStore) Delete(objName string) error {
return s.vfs.Remove(path.Join(s.dirname, objName))
}

// Size is part of the shared.Storage interface.
func (s *localFSStore) Size(objName string) (int64, error) {
f, err := s.vfs.Open(path.Join(s.dirname, objName))
if err != nil {
return 0, err
}
defer f.Close()
stat, err := f.Stat()
if err != nil {
return 0, err
}
return stat.Size(), nil
}

// IsNotExistError is part of the shared.Storage interface.
func (s *localFSStore) IsNotExistError(err error) bool {
return err == os.ErrNotExist
}

0 comments on commit 02413ad

Please sign in to comment.