diff --git a/ethdb/AbstractKV.md b/ethdb/AbstractKV.md deleted file mode 100644 index 87f3d9cba84..00000000000 --- a/ethdb/AbstractKV.md +++ /dev/null @@ -1,232 +0,0 @@ -## Target: - -To build 1 key-value abstraction on top of LMDB and RemoteKV (our own read-only TCP protocol for key-value databases). - -## Design principles: -- No internal copies/allocations. It means app must copy keys/values before put to database. -- Low-level API: as close to original LMDB as possible. -- Expose concept of transaction - app-level code can Begin/Commit/Rollback -- If your are not familiar with "DupSort" concept, please read [indices.md](./../docs/programmers_guide/indices.md) first. - -## Result interface: - -``` -// ethdb/kv_abstract.go - -// KV low-level database interface - main target is - to provide common abstraction over top of LMDB and RemoteKV. -// -// Common pattern for short-living transactions: -// -// if err := db.View(ctx, func(tx ethdb.Tx) error { -// ... code which uses database in transaction -// }); err != nil { -// return err -// } -// -// Common pattern for long-living transactions: -// tx, err := db.Begin(true) -// if err != nil { -// return err -// } -// defer tx.Rollback() -// -// ... code which uses database in transaction -// -// err := tx.Commit() -// if err != nil { -// return err -// } -// -type KV interface { - View(ctx context.Context, f func(tx Tx) error) error - Update(ctx context.Context, f func(tx Tx) error) error - Close() - - // Begin - creates transaction - // tx may be discarded by .Rollback() method - // - // A transaction and its cursors must only be used by a single - // thread (not goroutine), and a thread may only have a single transaction at a time. - // It happen automatically by - because this method calls runtime.LockOSThread() inside (Rollback/Commit releases it) - // By this reason application code can't call runtime.UnlockOSThread() - it leads to undefined behavior. - // - // If this `parent` is non-NULL, the new transaction - // will be a nested transaction, with the transaction indicated by parent - // as its parent. Transactions may be nested to any level. A parent - // transaction and its cursors may not issue any other operations than - // Commit and Rollback while it has active child transactions. - Begin(ctx context.Context, parent Tx, writable bool) (Tx, error) - AllBuckets() dbutils.BucketsCfg -} - -type Tx interface { - // Cursor - creates cursor object on top of given bucket. Type of cursor - depends on bucket configuration. - // If bucket was created with lmdb.DupSort flag, then cursor with interface CursorDupSort created - // If bucket was created with lmdb.DupFixed flag, then cursor with interface CursorDupFixed created - // Otherwise - object of interface Cursor created - // - // Cursor, also provides a grain of magic - it can use a declarative configuration - and automatically break - // long keys into DupSort key/values. See docs for `bucket.go:BucketConfigItem` - Cursor(bucket string) Cursor - CursorDupSort(bucket string) CursorDupSort // CursorDupSort - can be used if bucket has lmdb.DupSort flag - CursorDupFixed(bucket string) CursorDupFixed // CursorDupSort - can be used if bucket has lmdb.DupFixed flag - Get(bucket string, key []byte) (val []byte, err error) - - Commit(ctx context.Context) error // Commit all the operations of a transaction into the database. - Rollback() // Rollback - abandon all the operations of the transaction instead of saving them. - - BucketSize(name string) (uint64, error) -} - -// Interface used for buckets migration, don't use it in usual app code -type BucketMigrator interface { - DropBucket(string) error - CreateBucket(string) error - ExistsBucket(string) bool - ClearBucket(string) error - ExistingBuckets() ([]string, error) -} - -// Cursor - class for navigating through a database -// CursorDupSort and CursorDupFixed are inherit this class -// -// If methods (like First/Next/Seek) return error, then returned key SHOULD not be nil (can be []byte{} for example). -// Then looping code will look as: -// c := kv.Cursor(bucketName) -// for k, v, err := c.First(); k != nil; k, v, err = c.Next() { -// if err != nil { -// return err -// } -// ... logic -// } -type Cursor interface { - Prefix(v []byte) Cursor // Prefix returns only keys with given prefix, useful RemoteKV - because filtering done by server - Prefetch(v uint) Cursor // Prefetch enables data streaming - used only by RemoteKV - - First() ([]byte, []byte, error) // First - position at first key/data item - Seek(seek []byte) ([]byte, []byte, error) // Seek - position at first key greater than or equal to specified key - SeekExact(key []byte) ([]byte, error) // SeekExact - position at first key greater than or equal to specified key - Next() ([]byte, []byte, error) // Next - position at next key/value (can iterate over DupSort key/values automatically) - Prev() ([]byte, []byte, error) // Prev - position at previous key - Last() ([]byte, []byte, error) // Last - position at last key and last possible value - Current() ([]byte, []byte, error) // Current - return key/data at current cursor position - - Put(k, v []byte) error // Put - based on order - Append(k []byte, v []byte) error // Append - append the given key/data pair to the end of the database. This option allows fast bulk loading when keys are already known to be in the correct order. - Delete(key []byte) error - - // DeleteCurrent This function deletes the key/data pair to which the cursor refers. - // This does not invalidate the cursor, so operations such as MDB_NEXT - // can still be used on it. - // Both MDB_NEXT and MDB_GET_CURRENT will return the same record after - // this operation. - DeleteCurrent() error - - // PutNoOverwrite(key, value []byte) error - // Reserve() - - // PutCurrent - replace the item at the current cursor position. - // The key parameter must still be provided, and must match it. - // If using sorted duplicates (#MDB_DUPSORT) the data item must still - // sort into the same place. This is intended to be used when the - // new data is the same size as the old. Otherwise it will simply - // perform a delete of the old record followed by an insert. - PutCurrent(key, value []byte) error -} - -type CursorDupSort interface { - Cursor - - SeekBothExact(key, value []byte) ([]byte, []byte, error) - SeekBothRange(key, value []byte) ([]byte, []byte, error) - FirstDup() ([]byte, error) // FirstDup - position at first data item of current key - NextDup() ([]byte, []byte, error) // NextDup - position at next data item of current key - NextNoDup() ([]byte, []byte, error) // NextNoDup - position at first data item of next key - LastDup() ([]byte, error) // LastDup - position at last data item of current key - - CountDuplicates() (uint64, error) // CountDuplicates - number of duplicates for the current key - DeleteCurrentDuplicates() error // DeleteCurrentDuplicates - deletes all of the data items for the current key - AppendDup(key, value []byte) error // AppendDup - same as Append, but for sorted dup data - - //PutIfNoDup() // Store the key-value pair only if key is not present -} - -// CursorDupFixed - has methods valid for buckets with lmdb.DupFixed flag -// See also lmdb.WrapMulti -type CursorDupFixed interface { - CursorDupSort - - // GetMulti - return up to a page of duplicate data items from current cursor position - // After return - move cursor to prepare for #MDB_NEXT_MULTIPLE - GetMulti() ([]byte, error) - // NextMulti - return up to a page of duplicate data items from next cursor position - // After return - move cursor to prepare for #MDB_NEXT_MULTIPLE - NextMulti() ([]byte, []byte, error) - // PutMulti store multiple contiguous data elements in a single request. - // Panics if len(page) is not a multiple of stride. - // The cursor's bucket must be DupFixed and DupSort. - PutMulti(key []byte, page []byte, stride int) error - // ReserveMulti() -} - -type HasStats interface { - DiskSize(context.Context) (uint64, error) // db size -} -``` - -## Rationale and Features list: - -#### Buckets concept: -- Bucket is an interface, can’t be nil, can't return error - -#### InMemory, ReadOnly, MultipleDatabases, Customization: -- `NewLMDB().InMem().ReadOnly().Open()` -- `NewLMDB().Path(path).WithBucketsConfig(config).Open()` - -#### Context: -- For transactions - yes -- For .First() and .Next() methods - no - -#### Cursor/Iterator: -- Cursor is an interface, can’t be nil. `db.Cursor()` can't return error -- `cursor.Prefix(prefix)` filtering keys by given prefix. RemoteKV - to support server side filtering. -- `cursor.Prefetch(1000)` - useful for Remote -- No Lazy values -- Methods .First, .Next, .Seek - can return error. -If err!=nil then key SHOULD be !=nil (can be []byte{} for example). -Then looping code will look as: -```go -for k, v, err := c.First(); k != nil; k, v, err = c.Next() { - if err != nil { - return err - } - // logic -} -``` - -#### Managed/un-managed transactions -- Tx is an interface -- db.Update, db.View - yes -- db.Batch - no -- all keys and values returned by all method are valid until end of transaction -- transaction object can be used only withing 1 goroutine -- it's safe to call .Rollback() after .Commit(), multiple rollbacks are also safe. Common transaction patter: -``` -tx, err := db.Begin(true) -if err != nil { - return err -} -defer tx.Rollback() - -// ... code which uses database in transaction - -err := tx.Commit() -if err != nil { - return err -} -``` - -## Not covered by Abstractions: -- TTL of keys -- Nested Buckets -- Backups diff --git a/ethdb/Readme.md b/ethdb/Readme.md new file mode 100644 index 00000000000..c2d5e906ea4 --- /dev/null +++ b/ethdb/Readme.md @@ -0,0 +1,130 @@ +#### `Ethdb` package hold's bouquet of objects to access DB + +Words "KV" and "DB" have special meaning here: +- KV - key-value-style API to access data: let developer manage transactions, stateful cursors. +- DB - object-oriented-style API to access data: Get/Put/Delete/WalkOverTable/MultiPut, managing transactions internally. + +So, DB abstraction fits 95% times and leads to more maintainable code - because it's looks stateless. + +About "key-value-style": Modern key-value databases don't provide Get/Put/Delete methods, + because it's very hard-drive-unfriendly - it pushes developers do random-disk-access which is [order of magnitude slower than sequential read](https://www.seagate.com/sg/en/tech-insights/lies-damn-lies-and-ssd-benchmark-master-ti/). + To enforce sequential-reads - introduced stateful cursors/iterators - they intentionally look as file-api: open_cursor/seek/write_data_from_current_position/move_to_end/step_back/step_forward/delete_key_on_current_position/append. + +## Class diagram: + +```asciiflow.com +// This is not call graph, just show classes from low-level to high-level. +// And show which classes satisfy which interfaces. + ++-----------------------------------+ +-----------------------------------+ +-----------------------------------+ +| github.com/ledgerwatch/lmdb-go | | github.com/torquem-ch/mdbx-go | | google.golang.org/grpc.ClientConn | +| (app-agnostic LMDB go bindings) | | (app-agnostic MDBX go bindings) | | (app-agnostic RPC and streaming) | ++-----------------------------------+ +-----------------------------------+ +-----------------------------------+ + | | | + | | | + v v v ++-----------------------------------+ +-----------------------------------+ +-----------------------------------+ +| ethdb/kv_lmdb.go | | ethdb/kv_mdbx.go | | ethdb/kv_remote.go | +| (tg-specific LMDB implementaion) | | (tg-specific MDBX implementaion) | | (tg-specific remote DB access) | ++-----------------------------------+ +-----------------------------------+ +-----------------------------------+ + | | | + | | | + v v v + +----------------------------------------------------------------------------------------------+ + | ethdb/kv_abstract.go | + | (Common KV interface. DB-friendly, disk-friendly, cpu-cache-friendly. | + | Same app code can work with local or remote database. | + | Allows experiment with another database implementations. | + | Supports context.Context for cancelation. Any operation can return error) | + +----------------------------------------------------------------------------------------------+ + | | | + | | | + v v v ++-----------------------------------+ +-----------------------------------+ +-----------------------------------+ +| ethdb/object_db.go | | ethdb/tx_db.go | | ethdb/remote/remotedbserver | +| (thread-safe, stateless, | | (non-thread-safe, more performant | | (grpc server, using kv_abstract, | +| opens/close short transactions | | than object_db, method Begin | | kv_remote call this server, 1 | +| internally when need) | | DOESN'T create new TxDb object) | | transaction maps on 1 grpc stream | ++-----------------------------------+ +-----------------------------------+ +-----------------------------------+ + | | + | | + v v + +-----------------------------------------------------------------------------------------------+ + | ethdb/interface.go | + | (Common DB interfaces. ethdb.Database and ethdb.DbWithPendingMutations are widely used) | + +-----------------------------------------------------------------------------------------------+ + | + | + v ++--------------------------------------------------+ +| ethdb/mutation.go | +| (also known as "batch", recording all writes and | +| them flush to DB in sorted way only when call | +| .Commit(), use it to avoid random-writes. | +| It use and satisfy ethdb.Database in same time | ++--------------------------------------------------+ + +``` + + +## ethdb.AbstractKV design: + +- InMemory, ReadOnly: `NewLMDB().InMem().ReadOnly().Open()` +- MultipleDatabases, Customization: `NewLMDB().Path(path).WithBucketsConfig(config).Open()` + + +- 1 Transaction object can be used only withing 1 goroutine. +- Only 1 write transaction can be active at a time (other will wait). +- Unlimited read transactions can be active concurrently (not blocked by write transaction). + + +- Methods db.Update, db.View - can be used to open and close short transaction. +- Methods Begin/Commit/Rollback - for long transaction. +- it's safe to call .Rollback() after .Commit(), multiple rollbacks are also safe. Common transaction patter: +``` +tx, err := db.Begin(true, nil, ethdb.RW) +if err != nil { + return err +} +defer tx.Rollback() // important to avoid transactions leak at panic or early return + +// ... code which uses database in transaction + +err := tx.Commit() +if err != nil { + return err +} +``` + + +- No internal copies/allocations. It means: 1. app must copy keys/values before put to database. 2. Data after read from db - valid only during current transaction - copy it if plan use data after transaction Commit/Rollback. +- Methods .Bucket() and .Cursor(), can’t return nil, can't return error. +- Bucket and Cursor - are interfaces - means different classes can satisfy it: for example `LmdbCursor`, `LmdbDupSortCursor`, `LmdbDupFixedCursor` classes satisfy it. + If your are not familiar with "DupSort" concept, please read [indices.md](./../docs/programmers_guide/indices.md) first. + + +- If Cursor returns err!=nil then key SHOULD be != nil (can be []byte{} for example). +Then traversal code look as: +```go +for k, v, err := c.First(); k != nil; k, v, err = c.Next() { + if err != nil { + return err + } + // logic +} +``` +- Move cursor: `cursor.Seek(key)` + + + +## ethdb.Database design: + +- Allows pass multiple implementations +- Allows traversal tables by `db.Walk` and `db.MultiWalk` + +## ethdb.TxDb design: +- holds inside 1 long-running transaction and 1 cursor per table +- method Begin DOESN'T create new TxDb object, it means this object can be passed into other objects by pointer, + and high-level app code can start/commit transactions when it needs without re-creating all objects which holds + TxDb pointer. +- This is reason why txDb.CommitAndBegin() method works: inside it creating new transaction object, pinter to TxDb stays valid.