From 68ce44ccd56b93ed01b2edb02e24c32d4ae3f535 Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Mon, 9 Jan 2023 19:08:52 -0500 Subject: [PATCH] *: Add objstorage.shared.Storage interface for storing sstables This change adds a objstorage.shared.Storage interface that can be implemented by blob storage drivers. The objstorage.Provider coming in #2267 will largely be responsible for storing state of files' locations (i.e. shared or local), and calling into Storage as necessary. --- objstorage/shared/storage.go | 52 ++++++++++++++++++++++++++++++++++++ options.go | 12 +++++++++ 2 files changed, 64 insertions(+) create mode 100644 objstorage/shared/storage.go diff --git a/objstorage/shared/storage.go b/objstorage/shared/storage.go new file mode 100644 index 0000000000..6702e7d2fc --- /dev/null +++ b/objstorage/shared/storage.go @@ -0,0 +1,52 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package shared + +import "io" + +// Storage is an interface for a blob storage driver. This is lower-level +// than an FS-like interface, however FS/File-like abstractions can be built on +// top of these methods. +// +// TODO(bilal): Consider pushing shared file obsoletion as well as path +// generation behind this interface. +type Storage interface { + io.Closer + + // ReadObjectAt returns a Reader for reading the object at the requested name + // and offset. + ReadObjectAt(basename string, offset int64) (io.ReadCloser, int64, error) + + // CreateObject returns a writer for the object at the request name. A new + // empty object is created if CreateObject is called on an existing object. + // + // A Writer *must* be closed via either Close, and if closing returns a + // non-nil error, that error should be handled or reported to the user -- an + // implementation may buffer written data until Close and only then return + // an error, or Write may return an opaque io.EOF with the underlying cause + // returned by the subsequent Close(). + CreateObject(basename string) (io.WriteCloser, error) + + // List enumerates files within the supplied prefix, returning a list of + // objects within that prefix. If delimiter is non-empty, names which have the + // same prefix, prior to the delimiter but after the prefix, are grouped into a + // single result which is that prefix. The order that results are returned is + // undefined. If a prefix is specified, the prefix is trimmed from the result + // list. + // + // An example would be, if the storage contains objects a, b/4, b/5 and b/6, + // these would be the return values: + // List("", "") -> ["a", "b/4", "b/5", "b/6"] + // List("", "/") -> ["a", "b"] + // List("b", "/") -> ["4", "5", "6"] + // List("b", "") -> ["/4", "/5", "/6"] + List(prefix, delimiter string) []string + + // Delete removes the named object from the store. + Delete(basename string) error + + // Size returns the length of the named object in bytes. + Size(basename string) (int64, error) +} diff --git a/options.go b/options.go index c5016b06f3..fa8f24effd 100644 --- a/options.go +++ b/options.go @@ -624,6 +624,18 @@ type Options struct { // https://github.com/cockroachdb/pebble/issues/2292 and // https://github.com/cockroachdb/pebble/issues/2266 are closed. IngestSSTablesAsFlushable bool + + // SharedStorage is a second FS-like storage medium that can be shared + // between multiple Pebble instances. It is used to store sstables only, and + // is managed by objstorage.Provider. Each sstable might only be written to + // by one Pebble instance, but other Pebble instances can possibly read the + // same files if they have the path to get to them. The pebble instance that + // wrote a file should not delete it if other Pebble instances are known to + // be reading this file. This FS is expected to have slower read/write + // performance than the default FS above. + // + // TODO(bilal): Uncomment this once it's in use. + // SharedStorage shared.Storage } // Filters is a map from filter policy name to filter policy. It is used for