From b6ae081e8f8d47aa27642a44fd29956b4b142a89 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 28 Dec 2023 00:16:55 +0000 Subject: [PATCH 01/58] add dbconv draft --- cmd/dbconv/dbconv/config.go | 37 ++++++++ cmd/dbconv/dbconv/dbconv.go | 145 +++++++++++++++++++++++++++++++ cmd/dbconv/dbconv/dbconv_test.go | 92 ++++++++++++++++++++ cmd/dbconv/main.go | 15 ++++ 4 files changed, 289 insertions(+) create mode 100644 cmd/dbconv/dbconv/config.go create mode 100644 cmd/dbconv/dbconv/dbconv.go create mode 100644 cmd/dbconv/dbconv/dbconv_test.go create mode 100644 cmd/dbconv/main.go diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go new file mode 100644 index 0000000000..9a0a79c10b --- /dev/null +++ b/cmd/dbconv/dbconv/config.go @@ -0,0 +1,37 @@ +package dbconv + +import ( + flag "github.com/spf13/pflag" +) + +type DBConfig struct { + Data string `koanf:"data"` + DBEngine string `koanf:"db-engine"` + Handles int `koanf:"handles"` + Cache int `koanf:"cache"` +} + +// TODO +var DBConfigDefault = DBConfig{} + +func DBConfigAddOptions(prefix string, f *flag.FlagSet) { + // TODO + f.String(prefix+".data", DBConfigDefault.Data, "directory of stored chain state") + f.String(prefix+".db-engine", DBConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") + f.Int(prefix+".handles", DBConfigDefault.Handles, "number of file descriptor handles to use for the database") + f.Int(prefix+".cache", DBConfigDefault.Cache, "the capacity(in megabytes) of the data caching") +} + +type DBConvConfig struct { + Src DBConfig `koanf:"src"` + Dst DBConfig `koanf:"dst"` + Threads uint8 `koanf:"threads"` +} + +var DefaultDBConvConfig = DBConvConfig{} + +func DBConvConfigAddOptions(f *flag.FlagSet) { + DBConfigAddOptions(".src", f) + DBConfigAddOptions(".dst", f) + f.Uint8("threads", DefaultDBConvConfig.Threads, "number of threads to use (1-255, 0 = auto)") +} diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go new file mode 100644 index 0000000000..e0f8e5703e --- /dev/null +++ b/cmd/dbconv/dbconv/dbconv.go @@ -0,0 +1,145 @@ +package dbconv + +import ( + "context" + "errors" + "fmt" + "math" + "sync" + + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" +) + +type DBConverter struct { + config *DBConvConfig + + src ethdb.Database + dst ethdb.Database +} + +func NewDBConverter(config *DBConvConfig) *DBConverter { + return &DBConverter{config: config} +} + +func openDB(config *DBConfig, readonly bool) (ethdb.Database, error) { + return rawdb.Open(rawdb.OpenOptions{ + Type: config.DBEngine, + Directory: config.Data, + AncientsDirectory: "", // don't open freezer + Namespace: "", // TODO do we need metrics namespace? + Cache: config.Cache, + Handles: config.Handles, + ReadOnly: readonly, + }) +} + +func (c *DBConverter) copyEntries(ctx context.Context, prefix []byte) error { + it := c.src.NewIterator(prefix, nil) + defer it.Release() + batch := c.dst.NewBatch() + var lastKey []byte + var lastKeyInBatch []byte + for it.Next() && ctx.Err() == nil { + key, value := it.Key(), it.Value() + if err := batch.Put(key, value); err != nil { + return err + } + lastKeyInBatch = key + if batch.ValueSize() >= ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + return err + } + lastKey = lastKeyInBatch + batch.Reset() + } + } + if err := batch.Write(); err != nil { + return err + } + lastKey = lastKeyInBatch + _ = lastKey // TODO add saving info about last written key and allow restarting in case of an interruption + return nil +} + +func entriesAndShortestKey(db ethdb.Database) (entries uint, shortest int) { + shortest = math.MaxInt + it := db.NewIterator(nil, nil) + for it.Next() { + entries++ + key := it.Key() + if len(key) < shortest { + shortest = len(key) + } + } + it.Release() + return +} + +func (c *DBConverter) Convert(ctx context.Context) error { + var err error + defer c.Close() + c.src, err = openDB(&c.config.Src, true) + if err != nil { + return err + } + c.dst, err = openDB(&c.config.Dst, false) + if err != nil { + return err + } + if c.config.Threads == uint8(0) { + return errors.New("threads count can't be 0") + } + + // copy empty key entry + if has, _ := c.src.Has([]byte{}); has { + value, err := c.src.Get([]byte{}) + if err != nil { + return fmt.Errorf("Source database: failed to get value for an empty key: %w", err) + } + err = c.dst.Put([]byte{}, value) + if err != nil { + return fmt.Errorf("Destination database: failed to put value for an empty key: %w", err) + } + } + + results := make(chan error, c.config.Threads) + for i := uint8(0); i < c.config.Threads; i++ { + results <- nil + } + var wg sync.WaitGroup + for i := 0; ctx.Err() == nil && i <= 0xff; i++ { + err = <-results + if err != nil { + return err + } + prefix := []byte{byte(i)} // TODO make better prefixes, for now we are assuming that majority of keys are 32 byte hashes representing legacy trie nodes + wg.Add(1) + go func() { + results <- c.copyEntries(ctx, prefix) + wg.Done() + }() + } + wg.Wait() +drainLoop: + for { + select { + case err = <-results: + if err != nil { + return err + } + default: + break drainLoop + } + } + return nil +} + +func (c *DBConverter) Close() { + if c.src != nil { + c.src.Close() + } + if c.dst != nil { + c.dst.Close() + } +} diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go new file mode 100644 index 0000000000..9054c49148 --- /dev/null +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -0,0 +1,92 @@ +package dbconv + +import ( + "bytes" + "context" + "testing" + + "github.com/ethereum/go-ethereum/log" + "github.com/offchainlabs/nitro/util/testhelpers" +) + +func TestConversion(t *testing.T) { + _ = testhelpers.InitTestLog(t, log.LvlTrace) + oldDBConfig := DBConfigDefault + oldDBConfig.Data = t.TempDir() + oldDBConfig.DBEngine = "leveldb" + + newDBConfig := DBConfigDefault + newDBConfig.Data = t.TempDir() + newDBConfig.DBEngine = "pebble" + + func() { + oldDb, err := openDB(&oldDBConfig, false) + Require(t, err) + defer oldDb.Close() + for i := 0; i < 0xfe; i++ { + data := []byte{byte(i)} + err = oldDb.Put(data, data) + Require(t, err) + for j := 0; j < 0xf; j++ { + data := []byte{byte(i), byte(j)} + err = oldDb.Put(data, data) + Require(t, err) + } + } + err = oldDb.Put([]byte{}, []byte{0xde, 0xed, 0xbe, 0xef}) + Require(t, err) + }() + + config := DefaultDBConvConfig + config.Src = oldDBConfig + config.Dst = newDBConfig + config.Threads = 32 + conv := NewDBConverter(&config) + defer conv.Close() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + err := conv.Convert(ctx) + Require(t, err) + conv.Close() + + oldDb, err := openDB(&oldDBConfig, true) + Require(t, err) + defer oldDb.Close() + newDb, err := openDB(&newDBConfig, true) + Require(t, err) + defer newDb.Close() + + func() { + it := oldDb.NewIterator(nil, nil) + defer it.Release() + for it.Next() { + if has, _ := newDb.Has(it.Key()); !has { + t.Log("Missing key in converted db, key:", it.Key()) + } + newValue, err := newDb.Get(it.Key()) + Require(t, err) + if !bytes.Equal(newValue, it.Value()) { + Fail(t, "Value missmatch, old:", it.Value(), "new:", newValue) + } + } + }() + func() { + it := newDb.NewIterator(nil, nil) + defer it.Release() + for it.Next() { + if has, _ := oldDb.Has(it.Key()); !has { + t.Log("Unexpected key in converted db, key:", it.Key()) + } + } + }() +} + +func Require(t *testing.T, err error, printables ...interface{}) { + t.Helper() + testhelpers.RequireImpl(t, err, printables...) +} + +func Fail(t *testing.T, printables ...interface{}) { + t.Helper() + testhelpers.FailImpl(t, printables...) +} diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go new file mode 100644 index 0000000000..7f21b3d4da --- /dev/null +++ b/cmd/dbconv/main.go @@ -0,0 +1,15 @@ +package main + +import ( + "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" + flag "github.com/spf13/pflag" +) + +func parseDBConv(args []string) (*DBConvConfig, error) { + f := flag.NewFlagSet("dbconv", flag.ContinueOnError) + dbconv.DBConvConfigAddOptions(f) +} + +func main() { + +} From a52d9acdf0dd0b488f7dc758aff09f831811e357 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 28 Dec 2023 00:26:06 +0000 Subject: [PATCH 02/58] make lint happy --- cmd/dbconv/dbconv/dbconv.go | 22 +--------------------- cmd/dbconv/main.go | 9 +++++++-- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index e0f8e5703e..a4e580463a 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "math" "sync" "github.com/ethereum/go-ethereum/core/rawdb" @@ -38,44 +37,25 @@ func (c *DBConverter) copyEntries(ctx context.Context, prefix []byte) error { it := c.src.NewIterator(prefix, nil) defer it.Release() batch := c.dst.NewBatch() - var lastKey []byte - var lastKeyInBatch []byte + // TODO support restarting in case of an interruption for it.Next() && ctx.Err() == nil { key, value := it.Key(), it.Value() if err := batch.Put(key, value); err != nil { return err } - lastKeyInBatch = key if batch.ValueSize() >= ethdb.IdealBatchSize { if err := batch.Write(); err != nil { return err } - lastKey = lastKeyInBatch batch.Reset() } } if err := batch.Write(); err != nil { return err } - lastKey = lastKeyInBatch - _ = lastKey // TODO add saving info about last written key and allow restarting in case of an interruption return nil } -func entriesAndShortestKey(db ethdb.Database) (entries uint, shortest int) { - shortest = math.MaxInt - it := db.NewIterator(nil, nil) - for it.Next() { - entries++ - key := it.Key() - if len(key) < shortest { - shortest = len(key) - } - } - it.Release() - return -} - func (c *DBConverter) Convert(ctx context.Context) error { var err error defer c.Close() diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 7f21b3d4da..49dd4f2fdd 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -1,15 +1,20 @@ package main import ( + "os" + "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" flag "github.com/spf13/pflag" ) -func parseDBConv(args []string) (*DBConvConfig, error) { +func parseDBConv(args []string) (*dbconv.DBConvConfig, error) { f := flag.NewFlagSet("dbconv", flag.ContinueOnError) dbconv.DBConvConfigAddOptions(f) + // TODO + return nil, nil } func main() { - + args := os.Args[1:] + _, _ = parseDBConv(args) } From 67922b5c52f334410e39558a007ef0d264a5c72f Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 28 Dec 2023 01:44:51 +0000 Subject: [PATCH 03/58] add database conversion system test draft --- cmd/dbconv/dbconv/dbconv_test.go | 7 +-- system_tests/db_conversion_test.go | 99 ++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 system_tests/db_conversion_test.go diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index 9054c49148..4fef71ac30 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -42,7 +42,6 @@ func TestConversion(t *testing.T) { config.Dst = newDBConfig config.Threads = 32 conv := NewDBConverter(&config) - defer conv.Close() ctx, cancel := context.WithCancel(context.Background()) defer cancel() err := conv.Convert(ctx) @@ -61,12 +60,12 @@ func TestConversion(t *testing.T) { defer it.Release() for it.Next() { if has, _ := newDb.Has(it.Key()); !has { - t.Log("Missing key in converted db, key:", it.Key()) + t.Log("Missing key in the converted db, key:", it.Key()) } newValue, err := newDb.Get(it.Key()) Require(t, err) if !bytes.Equal(newValue, it.Value()) { - Fail(t, "Value missmatch, old:", it.Value(), "new:", newValue) + Fail(t, "Value mismatch, old:", it.Value(), "new:", newValue) } } }() @@ -75,7 +74,7 @@ func TestConversion(t *testing.T) { defer it.Release() for it.Next() { if has, _ := oldDb.Has(it.Key()); !has { - t.Log("Unexpected key in converted db, key:", it.Key()) + Fail(t, "Unexpected key in the converted db, key:", it.Key()) } } }() diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go new file mode 100644 index 0000000000..eca4509903 --- /dev/null +++ b/system_tests/db_conversion_test.go @@ -0,0 +1,99 @@ +package arbtest + +import ( + "context" + "os" + "path" + "path/filepath" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" +) + +func TestDatabaseConversion(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + var dataDir string + builder := NewNodeBuilder(ctx).DefaultConfig(t, true) + builder.l2StackConfig.DBEngine = "leveldb" + builder.l2StackConfig.Name = "testl2" + builder.execConfig.Caching.Archive = true + cleanup := builder.Build(t) + dataDir = builder.dataDir + cleanupDone := false + defer func() { + if !cleanupDone { // TODO we should be able to call cleanup twice, rn it gets stuck then + cleanup() + } + }() + builder.L2Info.GenerateAccount("User2") + var txs []*types.Transaction + for i := uint64(0); i < 200; i++ { + tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) + txs = append(txs, tx) + err := builder.L2.Client.SendTransaction(ctx, tx) + Require(t, err) + } + for _, tx := range txs { + _, err := builder.L2.EnsureTxSucceeded(tx) + Require(t, err) + } + cleanupDone = true + cleanup() + t.Log("stopped first node") + + instanceDir := filepath.Join(dataDir, builder.l2StackConfig.Name) + err := os.Rename(filepath.Join(instanceDir, "chaindb"), filepath.Join(instanceDir, "chaindb_old")) + Require(t, err) + t.Log("converting chaindb...") + func() { + oldDBConfig := dbconv.DBConfigDefault + oldDBConfig.Data = path.Join(instanceDir, "chaindb_old") + oldDBConfig.DBEngine = "leveldb" + newDBConfig := dbconv.DBConfigDefault + newDBConfig.Data = path.Join(instanceDir, "chaindb") + newDBConfig.DBEngine = "pebble" + convConfig := dbconv.DefaultDBConvConfig + convConfig.Src = oldDBConfig + convConfig.Dst = newDBConfig + convConfig.Threads = 32 + conv := dbconv.NewDBConverter(&convConfig) + err := conv.Convert(ctx) + Require(t, err) + }() + t.Log("converting arbitrumdata...") + err = os.Rename(filepath.Join(instanceDir, "arbitrumdata"), filepath.Join(instanceDir, "arbitrumdata_old")) + Require(t, err) + func() { + oldDBConfig := dbconv.DBConfigDefault + oldDBConfig.Data = path.Join(instanceDir, "arbitrumdata_old") + oldDBConfig.DBEngine = "leveldb" + newDBConfig := dbconv.DBConfigDefault + newDBConfig.Data = path.Join(instanceDir, "arbitrumdata") + newDBConfig.DBEngine = "pebble" + convConfig := dbconv.DefaultDBConvConfig + convConfig.Src = oldDBConfig + convConfig.Dst = newDBConfig + convConfig.Threads = 32 + conv := dbconv.NewDBConverter(&convConfig) + err := conv.Convert(ctx) + Require(t, err) + }() + + builder = NewNodeBuilder(ctx).DefaultConfig(t, true) + builder.l2StackConfig.Name = "testl2" + builder.l2StackConfig.DBEngine = "pebble" + builder.dataDir = dataDir + cleanup = builder.Build(t) + defer cleanup() + + builder.L2Info.GenerateAccount("User2") + t.Log("sending test tx") + tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) + err = builder.L2.Client.SendTransaction(ctx, tx) + Require(t, err) + _, err = builder.L2.EnsureTxSucceeded(tx) + Require(t, err) +} From 5f61bc3ad28014c311e05367f2ac755a5033e3a7 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 28 Dec 2023 05:33:08 +0000 Subject: [PATCH 04/58] improve db conversion test --- system_tests/db_conversion_test.go | 104 +++++++++++++++-------------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index eca4509903..0f20163f65 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -2,6 +2,7 @@ package arbtest import ( "context" + "fmt" "os" "path" "path/filepath" @@ -9,24 +10,25 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/trie" "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" ) func TestDatabaseConversion(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var dataDir string builder := NewNodeBuilder(ctx).DefaultConfig(t, true) builder.l2StackConfig.DBEngine = "leveldb" builder.l2StackConfig.Name = "testl2" builder.execConfig.Caching.Archive = true - cleanup := builder.Build(t) - dataDir = builder.dataDir - cleanupDone := false - defer func() { - if !cleanupDone { // TODO we should be able to call cleanup twice, rn it gets stuck then - cleanup() + _ = builder.Build(t) + dataDir := builder.dataDir + l2CleanupDone := false + defer func() { // TODO we should be able to call cleanup twice, rn it gets stuck then + if !l2CleanupDone { + builder.L2.cleanup() } + builder.L1.cleanup() }() builder.L2Info.GenerateAccount("User2") var txs []*types.Transaction @@ -40,60 +42,60 @@ func TestDatabaseConversion(t *testing.T) { _, err := builder.L2.EnsureTxSucceeded(tx) Require(t, err) } - cleanupDone = true - cleanup() + l2CleanupDone = true + builder.L2.cleanup() + bc := builder.L2.ExecNode.Backend.ArbInterface().BlockChain() t.Log("stopped first node") instanceDir := filepath.Join(dataDir, builder.l2StackConfig.Name) - err := os.Rename(filepath.Join(instanceDir, "chaindb"), filepath.Join(instanceDir, "chaindb_old")) - Require(t, err) - t.Log("converting chaindb...") - func() { - oldDBConfig := dbconv.DBConfigDefault - oldDBConfig.Data = path.Join(instanceDir, "chaindb_old") - oldDBConfig.DBEngine = "leveldb" - newDBConfig := dbconv.DBConfigDefault - newDBConfig.Data = path.Join(instanceDir, "chaindb") - newDBConfig.DBEngine = "pebble" - convConfig := dbconv.DefaultDBConvConfig - convConfig.Src = oldDBConfig - convConfig.Dst = newDBConfig - convConfig.Threads = 32 - conv := dbconv.NewDBConverter(&convConfig) - err := conv.Convert(ctx) - Require(t, err) - }() - t.Log("converting arbitrumdata...") - err = os.Rename(filepath.Join(instanceDir, "arbitrumdata"), filepath.Join(instanceDir, "arbitrumdata_old")) - Require(t, err) - func() { - oldDBConfig := dbconv.DBConfigDefault - oldDBConfig.Data = path.Join(instanceDir, "arbitrumdata_old") - oldDBConfig.DBEngine = "leveldb" - newDBConfig := dbconv.DBConfigDefault - newDBConfig.Data = path.Join(instanceDir, "arbitrumdata") - newDBConfig.DBEngine = "pebble" - convConfig := dbconv.DefaultDBConvConfig - convConfig.Src = oldDBConfig - convConfig.Dst = newDBConfig - convConfig.Threads = 32 - conv := dbconv.NewDBConverter(&convConfig) - err := conv.Convert(ctx) + for _, dbname := range []string{"chaindb", "arbitrumdata"} { + err := os.Rename(filepath.Join(instanceDir, dbname), filepath.Join(instanceDir, fmt.Sprintf("%s_old", dbname))) Require(t, err) - }() + t.Log("converting:", dbname) + func() { + oldDBConfig := dbconv.DBConfigDefault + oldDBConfig.Data = path.Join(instanceDir, fmt.Sprintf("%s_old", dbname)) + oldDBConfig.DBEngine = "leveldb" + newDBConfig := dbconv.DBConfigDefault + newDBConfig.Data = path.Join(instanceDir, dbname) + newDBConfig.DBEngine = "pebble" + convConfig := dbconv.DefaultDBConvConfig + convConfig.Src = oldDBConfig + convConfig.Dst = newDBConfig + convConfig.Threads = 32 + conv := dbconv.NewDBConverter(&convConfig) + err := conv.Convert(ctx) + Require(t, err) + }() + } - builder = NewNodeBuilder(ctx).DefaultConfig(t, true) - builder.l2StackConfig.Name = "testl2" builder.l2StackConfig.DBEngine = "pebble" - builder.dataDir = dataDir - cleanup = builder.Build(t) + testClient, cleanup := builder.Build2ndNode(t, &SecondNodeParams{stackConfig: builder.l2StackConfig}) defer cleanup() - builder.L2Info.GenerateAccount("User2") t.Log("sending test tx") tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) - err = builder.L2.Client.SendTransaction(ctx, tx) + err := testClient.Client.SendTransaction(ctx, tx) Require(t, err) - _, err = builder.L2.EnsureTxSucceeded(tx) + _, err = testClient.EnsureTxSucceeded(tx) Require(t, err) + + bc = testClient.ExecNode.Backend.ArbInterface().BlockChain() + current := bc.CurrentBlock() + if current == nil { + Fatal(t, "failed to get current block header") + } + triedb := bc.StateCache().TrieDB() + for i := uint64(0); i <= current.Number.Uint64(); i++ { + header := bc.GetHeaderByNumber(i) + _, err := bc.StateAt(header.Root) + Require(t, err) + tr, err := trie.New(trie.TrieID(header.Root), triedb) + Require(t, err) + it, err := tr.NodeIterator(nil) + Require(t, err) + for it.Next(true) { + } + Require(t, it.Error()) + } } From 689e653a3616b27e6c30c8944aa5591da4ee5f5a Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Sat, 30 Dec 2023 14:08:10 +0000 Subject: [PATCH 05/58] use start key instead of prefix, add main draft --- cmd/dbconv/dbconv/config.go | 15 ++-- cmd/dbconv/dbconv/dbconv.go | 118 +++++++++++++++++++++++------ cmd/dbconv/dbconv/dbconv_test.go | 32 +++++++- cmd/dbconv/main.go | 30 +++++++- system_tests/db_conversion_test.go | 3 + 5 files changed, 165 insertions(+), 33 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 9a0a79c10b..0d2e61710f 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -1,6 +1,7 @@ package dbconv import ( + "github.com/ethereum/go-ethereum/ethdb" flag "github.com/spf13/pflag" ) @@ -23,15 +24,17 @@ func DBConfigAddOptions(prefix string, f *flag.FlagSet) { } type DBConvConfig struct { - Src DBConfig `koanf:"src"` - Dst DBConfig `koanf:"dst"` - Threads uint8 `koanf:"threads"` + Src DBConfig `koanf:"src"` + Dst DBConfig `koanf:"dst"` + Threads uint8 `koanf:"threads"` + IdealBatchSize int `koanf:"ideal-batch"` } -var DefaultDBConvConfig = DBConvConfig{} +var DefaultDBConvConfig = DBConvConfig{IdealBatchSize: ethdb.IdealBatchSize} func DBConvConfigAddOptions(f *flag.FlagSet) { - DBConfigAddOptions(".src", f) - DBConfigAddOptions(".dst", f) + DBConfigAddOptions("src", f) + DBConfigAddOptions("dst", f) f.Uint8("threads", DefaultDBConvConfig.Threads, "number of threads to use (1-255, 0 = auto)") + f.Uint8("ideal-batch", DefaultDBConvConfig.Threads, "ideal write batch size") // TODO } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index a4e580463a..11760d8110 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -1,13 +1,16 @@ package dbconv import ( + "bytes" "context" "errors" "fmt" + "math/big" "sync" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" ) type DBConverter struct { @@ -33,27 +36,106 @@ func openDB(config *DBConfig, readonly bool) (ethdb.Database, error) { }) } -func (c *DBConverter) copyEntries(ctx context.Context, prefix []byte) error { - it := c.src.NewIterator(prefix, nil) +func middleKey(start []byte, end []byte) []byte { + if len(end) == 0 { + end = make([]byte, len(start)) + for i := range end { + end[i] = 0xff + } + } + if len(start) > len(end) { + tmp := make([]byte, len(start)) + copy(tmp, end) + end = tmp + } else if len(start) < len(end) { + tmp := make([]byte, len(end)) + copy(tmp, start) + start = tmp + } + s := new(big.Int).SetBytes(start) + e := new(big.Int).SetBytes(end) + sum := new(big.Int).Add(s, e) + var m big.Int + var mid []byte + if sum.Bit(0) == 1 { + m.Lsh(sum, 7) + mid = make([]byte, len(start)+1) + } else { + m.Rsh(sum, 1) + mid = make([]byte, len(start)) + } + m.FillBytes(mid) + return mid +} + +func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, wg *sync.WaitGroup, results chan error) { + log.Debug("copyEntries", "start", start, "end", end) + it := c.src.NewIterator(nil, start) defer it.Release() + var err error + defer func() { + results <- err + }() + batch := c.dst.NewBatch() // TODO support restarting in case of an interruption + n := 0 + canFork := true for it.Next() && ctx.Err() == nil { - key, value := it.Key(), it.Value() - if err := batch.Put(key, value); err != nil { - return err + key := it.Key() + n++ + if n%10000 == 1 { + log.Debug("entry", "start", start, "end", end, "n", n, "len(key)", len(key)) } - if batch.ValueSize() >= ethdb.IdealBatchSize { - if err := batch.Write(); err != nil { - return err + if len(end) > 0 && bytes.Compare(key, end) >= 0 { + break + } + if err = batch.Put(key, it.Value()); err != nil { + return + } + if batch.ValueSize() >= c.config.IdealBatchSize { + if err = batch.Write(); err != nil { + return } batch.Reset() + if canFork { + select { + case err = <-results: + if err != nil { + return + } + if err = ctx.Err(); err != nil { + return + } + middle := middleKey(key, end) + if bytes.Compare(middle, key) > 0 && (len(end) == 0 || bytes.Compare(middle, end) < 0) { + // find next existing key after the middle to prevent the keys from growing too long + m := c.src.NewIterator(nil, middle) + if m.Next() { + middle = m.Key() + wg.Add(1) + go c.copyEntries(ctx, middle, end, wg, results) + } else { + results <- nil + } + end = middle + m.Release() + } else { + log.Warn("no more forking", "key", key, "middle", middle, "end", end) + canFork = false + results <- nil + } + default: + } + } } } - if err := batch.Write(); err != nil { - return err + if err = ctx.Err(); err == nil { + err = batch.Write() } - return nil + log.Info("copyEntries done", "start", start, "end", end, "n", n) + wg.Done() + return } func (c *DBConverter) Convert(ctx context.Context) error { @@ -88,18 +170,8 @@ func (c *DBConverter) Convert(ctx context.Context) error { results <- nil } var wg sync.WaitGroup - for i := 0; ctx.Err() == nil && i <= 0xff; i++ { - err = <-results - if err != nil { - return err - } - prefix := []byte{byte(i)} // TODO make better prefixes, for now we are assuming that majority of keys are 32 byte hashes representing legacy trie nodes - wg.Add(1) - go func() { - results <- c.copyEntries(ctx, prefix) - wg.Done() - }() - } + wg.Add(1) + go c.copyEntries(ctx, []byte{0}, nil, &wg, results) wg.Wait() drainLoop: for { diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index 4fef71ac30..e0e841bff2 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -3,12 +3,35 @@ package dbconv import ( "bytes" "context" + "math/rand" "testing" "github.com/ethereum/go-ethereum/log" "github.com/offchainlabs/nitro/util/testhelpers" ) +func TestMiddleKey(t *testing.T) { + triples := [][]byte{ + {0}, {0, 0}, {0, 0}, + {1}, {1, 1}, {1, 0, 128}, + {1}, {1, 0}, {1, 0}, + {1, 1}, {2}, {1, 128, 128}, + {1}, {2}, {1, 128}, + {1}, {2, 1}, {1, 128, 128}, + {0}, {255}, {127, 128}, + {0}, {}, {127, 128}, + {0, 0}, {}, {127, 255, 128}, + } + + for i := 0; i < len(triples)-2; i += 3 { + start, end, expected := triples[i], triples[i+1], triples[i+2] + if mid := middleKey(start, end); !bytes.Equal(mid, expected) { + Fail(t, "Unexpected result for start:", start, "end:", end, "want:", expected, "have:", mid) + } + } + +} + func TestConversion(t *testing.T) { _ = testhelpers.InitTestLog(t, log.LvlTrace) oldDBConfig := DBConfigDefault @@ -35,12 +58,19 @@ func TestConversion(t *testing.T) { } err = oldDb.Put([]byte{}, []byte{0xde, 0xed, 0xbe, 0xef}) Require(t, err) + for i := 0; i < 10000; i++ { + size := 1 + rand.Uint64()%100 + randomBytes := testhelpers.RandomizeSlice(make([]byte, size)) + err = oldDb.Put(randomBytes, []byte{byte(i)}) + Require(t, err) + } }() config := DefaultDBConvConfig config.Src = oldDBConfig config.Dst = newDBConfig - config.Threads = 32 + config.Threads = 2 + config.IdealBatchSize = 100 conv := NewDBConverter(&config) ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 49dd4f2fdd..bcdebb1960 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -1,20 +1,44 @@ package main import ( + "context" "os" + "github.com/ethereum/go-ethereum/log" "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" + "github.com/offchainlabs/nitro/cmd/genericconf" + "github.com/offchainlabs/nitro/cmd/util/confighelpers" flag "github.com/spf13/pflag" ) func parseDBConv(args []string) (*dbconv.DBConvConfig, error) { f := flag.NewFlagSet("dbconv", flag.ContinueOnError) dbconv.DBConvConfigAddOptions(f) - // TODO - return nil, nil + k, err := confighelpers.BeginCommonParse(f, args) + if err != nil { + return nil, err + } + var config dbconv.DBConvConfig + if err := confighelpers.EndCommonParse(k, &config); err != nil { + return nil, err + } + return &config, nil } func main() { args := os.Args[1:] - _, _ = parseDBConv(args) + config, err := parseDBConv(args) + if err != nil { + panic(err) + } + err = genericconf.InitLog("plaintext", log.LvlDebug, &genericconf.FileLoggingConfig{Enable: false}, nil) + if err != nil { + panic(err) + } + + conv := dbconv.NewDBConverter(config) + err = conv.Convert(context.Background()) + if err != nil { + panic(err) + } } diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index 0f20163f65..2e2348ccc4 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -86,6 +86,7 @@ func TestDatabaseConversion(t *testing.T) { Fatal(t, "failed to get current block header") } triedb := bc.StateCache().TrieDB() + visited := 0 for i := uint64(0); i <= current.Number.Uint64(); i++ { header := bc.GetHeaderByNumber(i) _, err := bc.StateAt(header.Root) @@ -95,7 +96,9 @@ func TestDatabaseConversion(t *testing.T) { it, err := tr.NodeIterator(nil) Require(t, err) for it.Next(true) { + visited++ } Require(t, it.Error()) } + t.Log("visited nodes:", visited) } From 83a22e1eb9036b10bbd6d922ad6307b27fd92dc0 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 3 Jan 2024 20:41:00 +0000 Subject: [PATCH 06/58] fix middle key lookup --- cmd/dbconv/dbconv/config.go | 20 ++++++---- cmd/dbconv/dbconv/dbconv.go | 68 ++++++++++++++++++-------------- cmd/dbconv/dbconv/dbconv_test.go | 9 ++++- 3 files changed, 60 insertions(+), 37 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 0d2e61710f..c57916c216 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -24,17 +24,23 @@ func DBConfigAddOptions(prefix string, f *flag.FlagSet) { } type DBConvConfig struct { - Src DBConfig `koanf:"src"` - Dst DBConfig `koanf:"dst"` - Threads uint8 `koanf:"threads"` - IdealBatchSize int `koanf:"ideal-batch"` + Src DBConfig `koanf:"src"` + Dst DBConfig `koanf:"dst"` + Threads int `koanf:"threads"` + IdealBatchSize int `koanf:"ideal-batch"` + MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` } -var DefaultDBConvConfig = DBConvConfig{IdealBatchSize: ethdb.IdealBatchSize} +var DefaultDBConvConfig = DBConvConfig{ + IdealBatchSize: ethdb.IdealBatchSize, + MinBatchesBeforeFork: 100, + Threads: 0, +} func DBConvConfigAddOptions(f *flag.FlagSet) { DBConfigAddOptions("src", f) DBConfigAddOptions("dst", f) - f.Uint8("threads", DefaultDBConvConfig.Threads, "number of threads to use (1-255, 0 = auto)") - f.Uint8("ideal-batch", DefaultDBConvConfig.Threads, "ideal write batch size") // TODO + f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use (0 = auto)") + f.Int("ideal-batch", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") // TODO + f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") // TODO } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index 11760d8110..17adaea9db 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -69,7 +69,7 @@ func middleKey(start []byte, end []byte) []byte { } func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, wg *sync.WaitGroup, results chan error) { - log.Debug("copyEntries", "start", start, "end", end) + log.Debug("copy entries", "start", start, "end", end) it := c.src.NewIterator(nil, start) defer it.Release() var err error @@ -80,12 +80,14 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, batch := c.dst.NewBatch() // TODO support restarting in case of an interruption n := 0 + f := 0 canFork := true + batchesSinceLastFork := 0 for it.Next() && ctx.Err() == nil { key := it.Key() n++ if n%10000 == 1 { - log.Debug("entry", "start", start, "end", end, "n", n, "len(key)", len(key)) + log.Debug("progress", "start", start, "end", end, "n", n, "forked", f) } if len(end) > 0 && bytes.Compare(key, end) >= 0 { break @@ -98,42 +100,49 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, return } batch.Reset() - if canFork { - select { - case err = <-results: - if err != nil { - return - } - if err = ctx.Err(); err != nil { - return - } - middle := middleKey(key, end) - if bytes.Compare(middle, key) > 0 && (len(end) == 0 || bytes.Compare(middle, end) < 0) { - // find next existing key after the middle to prevent the keys from growing too long - m := c.src.NewIterator(nil, middle) - if m.Next() { - middle = m.Key() + batchesSinceLastFork++ + } + if canFork && batchesSinceLastFork >= c.config.MinBatchesBeforeFork { + select { + case err = <-results: + if err != nil { + return + } + if err = ctx.Err(); err != nil { + return + } + middle := middleKey(key, end) + if bytes.Compare(middle, key) > 0 && (len(end) == 0 || bytes.Compare(middle, end) < 0) { + // find next existing key after the middle to prevent the keys from growing too long + m := c.src.NewIterator(nil, middle) + if m.Next() { + foundMiddle := m.Key() + if len(end) == 0 || bytes.Compare(foundMiddle, end) < 0 { wg.Add(1) - go c.copyEntries(ctx, middle, end, wg, results) + go c.copyEntries(ctx, foundMiddle, end, wg, results) + middle = foundMiddle + f++ } else { + // no entries either after the middle key or for the middle key results <- nil } - end = middle - m.Release() - } else { - log.Warn("no more forking", "key", key, "middle", middle, "end", end) - canFork = false - results <- nil } - default: + end = middle + m.Release() + batchesSinceLastFork = 0 + } else { + log.Warn("no more forking", "key", key, "middle", middle, "end", end) + canFork = false + results <- nil } + default: } } } if err = ctx.Err(); err == nil { err = batch.Write() } - log.Info("copyEntries done", "start", start, "end", end, "n", n) + log.Info("copy entries done", "start", start, "end", end, "n", n, "forked", f) wg.Done() return } @@ -149,8 +158,9 @@ func (c *DBConverter) Convert(ctx context.Context) error { if err != nil { return err } - if c.config.Threads == uint8(0) { - return errors.New("threads count can't be 0") + // TODO + if c.config.Threads <= 0 { + return errors.New("invalid threads count") } // copy empty key entry @@ -166,7 +176,7 @@ func (c *DBConverter) Convert(ctx context.Context) error { } results := make(chan error, c.config.Threads) - for i := uint8(0); i < c.config.Threads; i++ { + for i := 0; i < c.config.Threads-1; i++ { results <- nil } var wg sync.WaitGroup diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index e0e841bff2..53096602e0 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -30,6 +30,12 @@ func TestMiddleKey(t *testing.T) { } } + // for i := 0; i < 1000; i++ { + // for j := 0; j < 1000; j++ { + // start := new(big.Int.) + // m := moddleKey({i}, {j}) + // } + // } } func TestConversion(t *testing.T) { @@ -69,8 +75,9 @@ func TestConversion(t *testing.T) { config := DefaultDBConvConfig config.Src = oldDBConfig config.Dst = newDBConfig - config.Threads = 2 + config.Threads = 512 config.IdealBatchSize = 100 + config.MinBatchesBeforeFork = 10 conv := NewDBConverter(&config) ctx, cancel := context.WithCancel(context.Background()) defer cancel() From a1ea3aee2c7d50a65e6cd0f01d8275b27f08d67d Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 3 Jan 2024 20:44:11 +0000 Subject: [PATCH 07/58] fix lint --- cmd/dbconv/dbconv/config.go | 4 ++-- cmd/dbconv/dbconv/dbconv.go | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index c57916c216..abac696b9c 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -27,7 +27,7 @@ type DBConvConfig struct { Src DBConfig `koanf:"src"` Dst DBConfig `koanf:"dst"` Threads int `koanf:"threads"` - IdealBatchSize int `koanf:"ideal-batch"` + IdealBatchSize int `koanf:"ideal-batch-size"` MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` } @@ -41,6 +41,6 @@ func DBConvConfigAddOptions(f *flag.FlagSet) { DBConfigAddOptions("src", f) DBConfigAddOptions("dst", f) f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use (0 = auto)") - f.Int("ideal-batch", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") // TODO + f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") // TODO f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") // TODO } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index 17adaea9db..d15b67f2b1 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -144,7 +144,6 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, } log.Info("copy entries done", "start", start, "end", end, "n", n, "forked", f) wg.Done() - return } func (c *DBConverter) Convert(ctx context.Context) error { From d14a201d8140b91851f02488aa2aded2ce980b18 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 4 Jan 2024 15:40:30 +0000 Subject: [PATCH 08/58] add initial progress reporting --- cmd/dbconv/dbconv/dbconv.go | 27 +++++++--- cmd/dbconv/dbconv/dbconv_test.go | 29 ++++++---- cmd/dbconv/dbconv/stats.go | 93 ++++++++++++++++++++++++++++++++ cmd/dbconv/main.go | 19 ++++++- 4 files changed, 152 insertions(+), 16 deletions(-) create mode 100644 cmd/dbconv/dbconv/stats.go diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index d15b67f2b1..95113b7117 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -15,13 +15,16 @@ import ( type DBConverter struct { config *DBConvConfig + stats Stats src ethdb.Database dst ethdb.Database } func NewDBConverter(config *DBConvConfig) *DBConverter { - return &DBConverter{config: config} + return &DBConverter{ + config: config, + } } func openDB(config *DBConfig, readonly bool) (ethdb.Database, error) { @@ -82,23 +85,25 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, n := 0 f := 0 canFork := true + entriesInBatch := 0 batchesSinceLastFork := 0 for it.Next() && ctx.Err() == nil { key := it.Key() n++ - if n%10000 == 1 { - log.Debug("progress", "start", start, "end", end, "n", n, "forked", f) - } if len(end) > 0 && bytes.Compare(key, end) >= 0 { break } if err = batch.Put(key, it.Value()); err != nil { return } - if batch.ValueSize() >= c.config.IdealBatchSize { + entriesInBatch++ + if batchSize := batch.ValueSize(); batchSize >= c.config.IdealBatchSize { if err = batch.Write(); err != nil { return } + c.stats.AddEntries(int64(entriesInBatch)) + c.stats.AddBytes(int64(batchSize)) + entriesInBatch = 0 batch.Reset() batchesSinceLastFork++ } @@ -140,7 +145,10 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, } } if err = ctx.Err(); err == nil { + batchSize := batch.ValueSize() err = batch.Write() + c.stats.AddEntries(int64(entriesInBatch)) + c.stats.AddBytes(int64(batchSize)) } log.Info("copy entries done", "start", start, "end", end, "n", n, "forked", f) wg.Done() @@ -162,6 +170,8 @@ func (c *DBConverter) Convert(ctx context.Context) error { return errors.New("invalid threads count") } + c.stats.Reset() + // copy empty key entry if has, _ := c.src.Has([]byte{}); has { value, err := c.src.Get([]byte{}) @@ -172,8 +182,9 @@ func (c *DBConverter) Convert(ctx context.Context) error { if err != nil { return fmt.Errorf("Destination database: failed to put value for an empty key: %w", err) } + c.stats.AddEntries(1) + c.stats.AddBytes(int64(len(value))) // adding only value len as key is empty } - results := make(chan error, c.config.Threads) for i := 0; i < c.config.Threads-1; i++ { results <- nil @@ -196,6 +207,10 @@ drainLoop: return nil } +func (c *DBConverter) Stats() *Stats { + return &c.stats +} + func (c *DBConverter) Close() { if c.src != nil { c.src.Close() diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index 53096602e0..66662e02ac 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -3,6 +3,7 @@ package dbconv import ( "bytes" "context" + "math/big" "math/rand" "testing" @@ -15,14 +16,13 @@ func TestMiddleKey(t *testing.T) { {0}, {0, 0}, {0, 0}, {1}, {1, 1}, {1, 0, 128}, {1}, {1, 0}, {1, 0}, - {1, 1}, {2}, {1, 128, 128}, {1}, {2}, {1, 128}, {1}, {2, 1}, {1, 128, 128}, {0}, {255}, {127, 128}, {0}, {}, {127, 128}, {0, 0}, {}, {127, 255, 128}, + {1, 1}, {2}, {1, 128, 128}, } - for i := 0; i < len(triples)-2; i += 3 { start, end, expected := triples[i], triples[i+1], triples[i+2] if mid := middleKey(start, end); !bytes.Equal(mid, expected) { @@ -30,12 +30,23 @@ func TestMiddleKey(t *testing.T) { } } - // for i := 0; i < 1000; i++ { - // for j := 0; j < 1000; j++ { - // start := new(big.Int.) - // m := moddleKey({i}, {j}) - // } - // } + for i := int64(0); i < 1000-1; i++ { + for j := int64(0); j < 1000; j++ { + start := big.NewInt(i).Bytes() + end := big.NewInt(j).Bytes() + if bytes.Compare(start, end) > 0 { + start, end = end, start + } + middle := middleKey(start, end) + if bytes.Compare(middle, start) < 0 { + Fail(t, "middle < start, start:", start, "end:", end, "middle:", middle) + } + if bytes.Compare(middle, end) > 0 { + Fail(t, "middle > end, start:", start, "end:", end, "middle:", middle) + } + } + } + } func TestConversion(t *testing.T) { @@ -64,7 +75,7 @@ func TestConversion(t *testing.T) { } err = oldDb.Put([]byte{}, []byte{0xde, 0xed, 0xbe, 0xef}) Require(t, err) - for i := 0; i < 10000; i++ { + for i := 0; i < 100000; i++ { size := 1 + rand.Uint64()%100 randomBytes := testhelpers.RandomizeSlice(make([]byte, size)) err = oldDb.Put(randomBytes, []byte{byte(i)}) diff --git a/cmd/dbconv/dbconv/stats.go b/cmd/dbconv/dbconv/stats.go new file mode 100644 index 0000000000..33e99d81af --- /dev/null +++ b/cmd/dbconv/dbconv/stats.go @@ -0,0 +1,93 @@ +package dbconv + +import ( + "sync/atomic" + "time" + + "github.com/ethereum/go-ethereum/log" +) + +type Stats struct { + entries atomic.Int64 + bytes atomic.Int64 + + startTimestamp int64 + prevEntires int64 + prevBytes int64 + prevEntiresTimestamp int64 + prevBytesTimestamp int64 +} + +func (s *Stats) Reset() { + now := time.Now().UnixNano() + s.entries.Store(0) + s.bytes.Store(0) + s.startTimestamp = now + s.prevEntires = 0 + s.prevBytes = 0 + s.prevEntiresTimestamp = now + s.prevBytesTimestamp = now +} + +func (s *Stats) AddEntries(entries int64) { + s.entries.Add(entries) +} + +func (s *Stats) Entries() int64 { + return s.entries.Load() +} + +func (s *Stats) AddBytes(bytes int64) { + s.bytes.Add(bytes) +} + +func (s *Stats) Bytes() int64 { + return s.bytes.Load() +} + +// not thread safe vs itself +func (s *Stats) EntriesPerSecond() float64 { + now := time.Now().UnixNano() + current := s.Entries() + dt := now - s.prevEntiresTimestamp + if dt == 0 { + dt = 1 + } + de := current - s.prevEntires + s.prevEntires = current + s.prevEntiresTimestamp = now + return float64(de) * 1e9 / float64(dt) +} + +// not thread safe vs itself +func (s *Stats) BytesPerSecond() float64 { + now := time.Now().UnixNano() + current := s.Bytes() + dt := now - s.prevBytesTimestamp + if dt == 0 { + dt = 1 + } + db := current - s.prevBytes + s.prevBytes = current + s.prevBytesTimestamp = now + log.Debug("BytesPerSecond", "dt", dt, "current", current) + return float64(db) * 1e9 / float64(dt) +} + +func (s *Stats) AverageEntriesPerSecond() float64 { + now := time.Now().UnixNano() + dt := now - s.startTimestamp + if dt == 0 { + dt = 1 + } + return float64(s.Entries()) * 1e9 / float64(dt) +} + +func (s *Stats) AverageBytesPerSecond() float64 { + now := time.Now().UnixNano() + dt := now - s.startTimestamp + if dt == 0 { + dt = 1 + } + return float64(s.Bytes()) * 1e9 / float64(dt) +} diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index bcdebb1960..58656fb90d 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -3,6 +3,7 @@ package main import ( "context" "os" + "time" "github.com/ethereum/go-ethereum/log" "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" @@ -37,7 +38,23 @@ func main() { } conv := dbconv.NewDBConverter(config) - err = conv.Convert(context.Background()) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + for { + select { + case <-ticker.C: + stats := conv.Stats() + log.Info("Progress", "entries", stats.Entries(), "entires/s", stats.EntriesPerSecond(), "MB", float64(stats.Bytes())/1024/1024, "MB/s", stats.BytesPerSecond()/1024/1024, "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024) + case <-ctx.Done(): + return + } + } + }() + + err = conv.Convert(ctx) if err != nil { panic(err) } From dd4ec96997803d7aa1cae26e752c066c4435d2ae Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 4 Jan 2024 15:52:28 +0000 Subject: [PATCH 09/58] remove debug log from stats --- cmd/dbconv/dbconv/stats.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/cmd/dbconv/dbconv/stats.go b/cmd/dbconv/dbconv/stats.go index 33e99d81af..4094e6b9ac 100644 --- a/cmd/dbconv/dbconv/stats.go +++ b/cmd/dbconv/dbconv/stats.go @@ -3,8 +3,6 @@ package dbconv import ( "sync/atomic" "time" - - "github.com/ethereum/go-ethereum/log" ) type Stats struct { @@ -70,7 +68,6 @@ func (s *Stats) BytesPerSecond() float64 { db := current - s.prevBytes s.prevBytes = current s.prevBytesTimestamp = now - log.Debug("BytesPerSecond", "dt", dt, "current", current) return float64(db) * 1e9 / float64(dt) } From c81e6c801840850d21854fd342c0107939424e16 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 4 Jan 2024 16:57:09 +0000 Subject: [PATCH 10/58] fix forking, add more stats --- cmd/dbconv/dbconv/config.go | 2 +- cmd/dbconv/dbconv/dbconv.go | 8 +++++++- cmd/dbconv/dbconv/stats.go | 29 +++++++++++++++++++++++++++++ cmd/dbconv/main.go | 5 ++++- 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index abac696b9c..3caa5947e8 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -33,7 +33,7 @@ type DBConvConfig struct { var DefaultDBConvConfig = DBConvConfig{ IdealBatchSize: ethdb.IdealBatchSize, - MinBatchesBeforeFork: 100, + MinBatchesBeforeFork: 10, Threads: 0, } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index 95113b7117..a88392c583 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -73,6 +73,7 @@ func middleKey(start []byte, end []byte) []byte { func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, wg *sync.WaitGroup, results chan error) { log.Debug("copy entries", "start", start, "end", end) + c.stats.AddThread() it := c.src.NewIterator(nil, start) defer it.Release() var err error @@ -126,15 +127,19 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, wg.Add(1) go c.copyEntries(ctx, foundMiddle, end, wg, results) middle = foundMiddle + batchesSinceLastFork = 0 + c.stats.AddFork() f++ } else { // no entries either after the middle key or for the middle key results <- nil } + } else { + // no entries either after the middle key or for the middle key + results <- nil } end = middle m.Release() - batchesSinceLastFork = 0 } else { log.Warn("no more forking", "key", key, "middle", middle, "end", end) canFork = false @@ -151,6 +156,7 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, c.stats.AddBytes(int64(batchSize)) } log.Info("copy entries done", "start", start, "end", end, "n", n, "forked", f) + c.stats.DecThread() wg.Done() } diff --git a/cmd/dbconv/dbconv/stats.go b/cmd/dbconv/dbconv/stats.go index 4094e6b9ac..04e239a51f 100644 --- a/cmd/dbconv/dbconv/stats.go +++ b/cmd/dbconv/dbconv/stats.go @@ -8,6 +8,8 @@ import ( type Stats struct { entries atomic.Int64 bytes atomic.Int64 + forks atomic.Int64 + threads atomic.Int64 startTimestamp int64 prevEntires int64 @@ -20,6 +22,8 @@ func (s *Stats) Reset() { now := time.Now().UnixNano() s.entries.Store(0) s.bytes.Store(0) + s.forks.Store(0) + s.threads.Store(0) s.startTimestamp = now s.prevEntires = 0 s.prevBytes = 0 @@ -43,6 +47,31 @@ func (s *Stats) Bytes() int64 { return s.bytes.Load() } +func (s *Stats) AddFork() { + s.forks.Add(1) +} + +func (s *Stats) Forks() int64 { + return s.forks.Load() +} + +func (s *Stats) AddThread() { + s.threads.Add(1) +} +func (s *Stats) DecThread() { + s.threads.Add(-1) +} + +func (s *Stats) Threads() int64 { + return s.threads.Load() +} + +func (s *Stats) Elapsed() time.Duration { + now := time.Now().UnixNano() + dt := now - s.startTimestamp + return time.Duration(dt) +} + // not thread safe vs itself func (s *Stats) EntriesPerSecond() float64 { now := time.Now().UnixNano() diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 58656fb90d..51702ab121 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -47,7 +47,8 @@ func main() { select { case <-ticker.C: stats := conv.Stats() - log.Info("Progress", "entries", stats.Entries(), "entires/s", stats.EntriesPerSecond(), "MB", float64(stats.Bytes())/1024/1024, "MB/s", stats.BytesPerSecond()/1024/1024, "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024) + log.Info("Progress:\n", "entries", stats.Entries(), "entires/s", stats.EntriesPerSecond(), "avg e/s", stats.AverageEntriesPerSecond(), "MB/s", float64(stats.Bytes())/1024/1024, "MB/s", stats.BytesPerSecond()/1024/1024, "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "forks", stats.Forks(), "threads", stats.Threads(), "elapsed", stats.Elapsed()) + case <-ctx.Done(): return } @@ -58,4 +59,6 @@ func main() { if err != nil { panic(err) } + stats := conv.Stats() + log.Info("Conversion finished.", "entries", stats.Entries(), "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) } From 97ea9b78bfae84c6f682ac2212f20bf3ec1a48f9 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 4 Jan 2024 18:05:37 +0000 Subject: [PATCH 11/58] add verification option --- cmd/dbconv/dbconv/config.go | 23 +++++++++++++++++-- cmd/dbconv/dbconv/dbconv.go | 46 +++++++++++++++++++++++++++++++++++++ cmd/dbconv/main.go | 44 +++++++++++++++++++++++++++++------ 3 files changed, 104 insertions(+), 9 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 3caa5947e8..19b5e14c3f 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -1,7 +1,9 @@ package dbconv import ( - "github.com/ethereum/go-ethereum/ethdb" + "fmt" + + "github.com/ethereum/go-ethereum/log" flag "github.com/spf13/pflag" ) @@ -29,12 +31,16 @@ type DBConvConfig struct { Threads int `koanf:"threads"` IdealBatchSize int `koanf:"ideal-batch-size"` MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` + Verify int `koanf:"verify"` + VerifyOnly bool `koanf:"verify-only"` } var DefaultDBConvConfig = DBConvConfig{ - IdealBatchSize: ethdb.IdealBatchSize, + IdealBatchSize: 100 * 1024 * 1024, // 100 MB MinBatchesBeforeFork: 10, Threads: 0, + Verify: 1, + VerifyOnly: false, } func DBConvConfigAddOptions(f *flag.FlagSet) { @@ -43,4 +49,17 @@ func DBConvConfigAddOptions(f *flag.FlagSet) { f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use (0 = auto)") f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") // TODO f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") // TODO + f.Int("verify", DefaultDBConvConfig.Verify, "enables verification (0 = disabled, 1 = only keys, 2 = keys and values)") // TODO + f.Bool("verify-only", DefaultDBConvConfig.VerifyOnly, "skips conversion, runs verification only") // TODO +} + +func (c *DBConvConfig) Validate() error { + if c.Verify < 0 || c.Verify > 2 { + return fmt.Errorf("Invalid verify config value: %v", c.Verify) + } + if c.VerifyOnly && c.Verify == 0 { + log.Info("enabling keys verification as --verify-only flag is set") + c.Verify = 1 + } + return nil } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index a88392c583..a7c8d826e2 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -213,6 +213,52 @@ drainLoop: return nil } +func (c *DBConverter) Verify(ctx context.Context) error { + if c.config.Verify == 1 { + log.Info("Starting quick verification - verifying only keys existence") + } else { + log.Info("Starting full verification - verifying keys and values") + } + var err error + defer c.Close() + c.src, err = openDB(&c.config.Src, true) + if err != nil { + return err + } + c.dst, err = openDB(&c.config.Dst, true) + if err != nil { + return err + } + + c.stats.Reset() + c.stats.AddThread() + it := c.src.NewIterator(nil, nil) + defer it.Release() + for it.Next() { + switch c.config.Verify { + case 1: + if has, err := c.dst.Has(it.Key()); !has { + return fmt.Errorf("Missing key in destination db, key: %v, err: %w", it.Key(), err) + } + c.stats.AddBytes(int64(len(it.Key()))) + case 2: + dstValue, err := c.dst.Get(it.Key()) + if err != nil { + return err + } + if !bytes.Equal(dstValue, it.Value()) { + return fmt.Errorf("Value mismatch for key: %v, src value: %v, dst value: %s", it.Key(), it.Value(), dstValue) + } + c.stats.AddBytes(int64(len(it.Key()) + len(dstValue))) + default: + return fmt.Errorf("Invalid verify config value: %v", c.config.Verify) + } + c.stats.AddEntries(1) + } + c.stats.DecThread() + return nil +} + func (c *DBConverter) Stats() *Stats { return &c.stats } diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 51702ab121..0bb4d05cd2 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "fmt" "os" "time" @@ -26,20 +27,36 @@ func parseDBConv(args []string) (*dbconv.DBConvConfig, error) { return &config, nil } +func printSampleUsage(name string) { + fmt.Printf("Sample usage: %s [OPTIONS] \n\n", name) + fmt.Printf("Options:\n") + fmt.Printf(" --help\n") + fmt.Printf(" --src.db-engine \n") + fmt.Printf(" --src.data \n") + fmt.Printf(" --dst.db-engine \n") + fmt.Printf(" --dst.data \n") +} func main() { args := os.Args[1:] config, err := parseDBConv(args) if err != nil { - panic(err) + confighelpers.PrintErrorAndExit(err, printSampleUsage) + return } err = genericconf.InitLog("plaintext", log.LvlDebug, &genericconf.FileLoggingConfig{Enable: false}, nil) if err != nil { - panic(err) + log.Error("Failed to init logging", "err", err) + return } + if err = config.Validate(); err != nil { + log.Error("Invalid config", "err", err) + return + } conv := dbconv.NewDBConverter(config) ctx, cancel := context.WithCancel(context.Background()) defer cancel() + go func() { ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() @@ -55,10 +72,23 @@ func main() { } }() - err = conv.Convert(ctx) - if err != nil { - panic(err) + if !config.VerifyOnly { + err = conv.Convert(ctx) + if err != nil { + log.Error("Conversion error", "err", err) + return + } + stats := conv.Stats() + log.Info("Conversion finished.", "entries", stats.Entries(), "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) + } + + if config.Verify > 0 { + err = conv.Verify(ctx) + if err != nil { + log.Error("Verification error", "err", err) + return + } + stats := conv.Stats() + log.Info("Verification completed successfully.", "elapsed:", stats.Elapsed()) } - stats := conv.Stats() - log.Info("Conversion finished.", "entries", stats.Entries(), "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) } From 1e7041efd9be931b0a1a623ed0ce1e644e218ece Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 4 Jan 2024 18:44:14 +0000 Subject: [PATCH 12/58] reformat progress string, add log-level option --- cmd/dbconv/dbconv/config.go | 10 ++++++++-- cmd/dbconv/dbconv/dbconv.go | 4 ++-- cmd/dbconv/main.go | 6 +++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 19b5e14c3f..0fee6d3431 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -33,27 +33,33 @@ type DBConvConfig struct { MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` Verify int `koanf:"verify"` VerifyOnly bool `koanf:"verify-only"` + LogLevel int `koanf:"log-level"` } var DefaultDBConvConfig = DBConvConfig{ IdealBatchSize: 100 * 1024 * 1024, // 100 MB MinBatchesBeforeFork: 10, - Threads: 0, + Threads: 1, Verify: 1, VerifyOnly: false, + LogLevel: int(log.LvlDebug), } func DBConvConfigAddOptions(f *flag.FlagSet) { DBConfigAddOptions("src", f) DBConfigAddOptions("dst", f) - f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use (0 = auto)") + f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use") f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") // TODO f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") // TODO f.Int("verify", DefaultDBConvConfig.Verify, "enables verification (0 = disabled, 1 = only keys, 2 = keys and values)") // TODO f.Bool("verify-only", DefaultDBConvConfig.VerifyOnly, "skips conversion, runs verification only") // TODO + f.Int("log-level", DefaultDBConvConfig.LogLevel, "log level (0 crit - 5 trace)") // TODO } func (c *DBConvConfig) Validate() error { + if c.Threads < 0 { + return fmt.Errorf("Invalid threads number: %v", c.Threads) + } if c.Verify < 0 || c.Verify > 2 { return fmt.Errorf("Invalid verify config value: %v", c.Verify) } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index a7c8d826e2..8674b73a2e 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -72,7 +72,7 @@ func middleKey(start []byte, end []byte) []byte { } func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, wg *sync.WaitGroup, results chan error) { - log.Debug("copy entries", "start", start, "end", end) + log.Debug("new conversion worker", "start", start, "end", end) c.stats.AddThread() it := c.src.NewIterator(nil, start) defer it.Release() @@ -155,7 +155,7 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, c.stats.AddEntries(int64(entriesInBatch)) c.stats.AddBytes(int64(batchSize)) } - log.Info("copy entries done", "start", start, "end", end, "n", n, "forked", f) + log.Debug("worker done", "start", start, "end", end, "n", n, "forked", f) c.stats.DecThread() wg.Done() } diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 0bb4d05cd2..c0f81eed4b 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -43,7 +43,7 @@ func main() { confighelpers.PrintErrorAndExit(err, printSampleUsage) return } - err = genericconf.InitLog("plaintext", log.LvlDebug, &genericconf.FileLoggingConfig{Enable: false}, nil) + err = genericconf.InitLog("plaintext", log.Lvl(config.LogLevel), &genericconf.FileLoggingConfig{Enable: false}, nil) if err != nil { log.Error("Failed to init logging", "err", err) return @@ -64,7 +64,7 @@ func main() { select { case <-ticker.C: stats := conv.Stats() - log.Info("Progress:\n", "entries", stats.Entries(), "entires/s", stats.EntriesPerSecond(), "avg e/s", stats.AverageEntriesPerSecond(), "MB/s", float64(stats.Bytes())/1024/1024, "MB/s", stats.BytesPerSecond()/1024/1024, "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "forks", stats.Forks(), "threads", stats.Threads(), "elapsed", stats.Elapsed()) + fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tentr/s: %v\tMB/s: %v\n\taverage:\tentr/s: %v\tMB/s: %v\n\tthreads: %v\tforks: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond(), stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond(), stats.AverageBytesPerSecond()/1024/1024, stats.Threads(), stats.Forks()) case <-ctx.Done(): return @@ -79,7 +79,7 @@ func main() { return } stats := conv.Stats() - log.Info("Conversion finished.", "entries", stats.Entries(), "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) + log.Info("Conversion finished.", "entries", stats.Entries(), "MB", stats.Bytes()/1024/1024, "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) } if config.Verify > 0 { From c735874e56d61504efb53a77c1a284aab63f2a09 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 4 Jan 2024 19:16:21 +0000 Subject: [PATCH 13/58] add compaction option --- cmd/dbconv/dbconv/config.go | 25 ++++++++++++++----------- cmd/dbconv/dbconv/dbconv.go | 21 +++++++++++++++++++-- cmd/dbconv/main.go | 23 ++++++++++++++--------- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 0fee6d3431..ca936349e9 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -1,6 +1,7 @@ package dbconv import ( + "errors" "fmt" "github.com/ethereum/go-ethereum/log" @@ -31,8 +32,9 @@ type DBConvConfig struct { Threads int `koanf:"threads"` IdealBatchSize int `koanf:"ideal-batch-size"` MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` + Convert bool `koanf:"convert"` + Compact bool `koanf:"compact"` Verify int `koanf:"verify"` - VerifyOnly bool `koanf:"verify-only"` LogLevel int `koanf:"log-level"` } @@ -40,8 +42,9 @@ var DefaultDBConvConfig = DBConvConfig{ IdealBatchSize: 100 * 1024 * 1024, // 100 MB MinBatchesBeforeFork: 10, Threads: 1, - Verify: 1, - VerifyOnly: false, + Convert: false, + Compact: false, + Verify: 0, LogLevel: int(log.LvlDebug), } @@ -49,11 +52,12 @@ func DBConvConfigAddOptions(f *flag.FlagSet) { DBConfigAddOptions("src", f) DBConfigAddOptions("dst", f) f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use") - f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") // TODO - f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") // TODO - f.Int("verify", DefaultDBConvConfig.Verify, "enables verification (0 = disabled, 1 = only keys, 2 = keys and values)") // TODO - f.Bool("verify-only", DefaultDBConvConfig.VerifyOnly, "skips conversion, runs verification only") // TODO - f.Int("log-level", DefaultDBConvConfig.LogLevel, "log level (0 crit - 5 trace)") // TODO + f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") + f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") + f.Bool("convert", DefaultDBConvConfig.Convert, "enables conversion step") + f.Bool("compact", DefaultDBConvConfig.Compact, "enables compaction step") + f.Int("verify", DefaultDBConvConfig.Verify, "enables verification step (0 = disabled, 1 = only keys, 2 = keys and values)") + f.Int("log-level", DefaultDBConvConfig.LogLevel, "log level (0 crit - 5 trace)") } func (c *DBConvConfig) Validate() error { @@ -63,9 +67,8 @@ func (c *DBConvConfig) Validate() error { if c.Verify < 0 || c.Verify > 2 { return fmt.Errorf("Invalid verify config value: %v", c.Verify) } - if c.VerifyOnly && c.Verify == 0 { - log.Info("enabling keys verification as --verify-only flag is set") - c.Verify = 1 + if !c.Convert && c.Verify == 0 && !c.Compact { + return errors.New("nothing to be done, conversion, verification and compaction disabled") } return nil } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index 8674b73a2e..e188352824 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -7,6 +7,7 @@ import ( "fmt" "math/big" "sync" + "time" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" @@ -213,6 +214,22 @@ drainLoop: return nil } +func (c *DBConverter) CompactDestination() error { + var err error + c.dst, err = openDB(&c.config.Dst, false) + if err != nil { + return err + } + defer c.dst.Close() + start := time.Now() + log.Info("Compacting destination database", "data", c.config.Dst.Data) + if err := c.dst.Compact(nil, nil); err != nil { + return err + } + log.Info("Compaction done", "elapsed", time.Since(start)) + return nil +} + func (c *DBConverter) Verify(ctx context.Context) error { if c.config.Verify == 1 { log.Info("Starting quick verification - verifying only keys existence") @@ -234,7 +251,7 @@ func (c *DBConverter) Verify(ctx context.Context) error { c.stats.AddThread() it := c.src.NewIterator(nil, nil) defer it.Release() - for it.Next() { + for it.Next() && ctx.Err() == nil { switch c.config.Verify { case 1: if has, err := c.dst.Has(it.Key()); !has { @@ -256,7 +273,7 @@ func (c *DBConverter) Verify(ctx context.Context) error { c.stats.AddEntries(1) } c.stats.DecThread() - return nil + return ctx.Err() } func (c *DBConverter) Stats() *Stats { diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index c0f81eed4b..6e302352b2 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -28,14 +28,9 @@ func parseDBConv(args []string) (*dbconv.DBConvConfig, error) { } func printSampleUsage(name string) { - fmt.Printf("Sample usage: %s [OPTIONS] \n\n", name) - fmt.Printf("Options:\n") - fmt.Printf(" --help\n") - fmt.Printf(" --src.db-engine \n") - fmt.Printf(" --src.data \n") - fmt.Printf(" --dst.db-engine \n") - fmt.Printf(" --dst.data \n") + fmt.Printf("Sample usage: %s --help \n\n", name) } + func main() { args := os.Args[1:] config, err := parseDBConv(args) @@ -57,8 +52,8 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + ticker := time.NewTicker(10 * time.Second) go func() { - ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() for { select { @@ -72,7 +67,7 @@ func main() { } }() - if !config.VerifyOnly { + if config.Convert { err = conv.Convert(ctx) if err != nil { log.Error("Conversion error", "err", err) @@ -82,7 +77,17 @@ func main() { log.Info("Conversion finished.", "entries", stats.Entries(), "MB", stats.Bytes()/1024/1024, "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) } + if config.Compact { + ticker.Stop() + err = conv.CompactDestination() + if err != nil { + log.Error("Compaction error", "err", err) + return + } + } + if config.Verify > 0 { + ticker.Reset(10 * time.Second) err = conv.Verify(ctx) if err != nil { log.Error("Verification error", "err", err) From 83f1f57a31e69d4bd6c318affb88a998df94f32e Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 4 Jan 2024 19:29:23 +0000 Subject: [PATCH 14/58] clean ':' from log --- cmd/dbconv/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 6e302352b2..d2570ae7ce 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -94,6 +94,6 @@ func main() { return } stats := conv.Stats() - log.Info("Verification completed successfully.", "elapsed:", stats.Elapsed()) + log.Info("Verification completed successfully.", "elapsed", stats.Elapsed()) } } From 31e5b8c6606a8f4aef35fde63f337348ab4c2bac Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 5 Jan 2024 16:35:16 +0000 Subject: [PATCH 15/58] stop progress printing during compaction --- cmd/dbconv/dbconv/config.go | 2 +- cmd/dbconv/main.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index ca936349e9..141a8d297c 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -40,7 +40,7 @@ type DBConvConfig struct { var DefaultDBConvConfig = DBConvConfig{ IdealBatchSize: 100 * 1024 * 1024, // 100 MB - MinBatchesBeforeFork: 10, + MinBatchesBeforeFork: 2, Threads: 1, Convert: false, Compact: false, diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index d2570ae7ce..7312a6c2e7 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -59,7 +59,7 @@ func main() { select { case <-ticker.C: stats := conv.Stats() - fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tentr/s: %v\tMB/s: %v\n\taverage:\tentr/s: %v\tMB/s: %v\n\tthreads: %v\tforks: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond(), stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond(), stats.AverageBytesPerSecond()/1024/1024, stats.Threads(), stats.Forks()) + fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tMe/s: %v\tMB/s: %v\n\taverage:\tMe/s: %v\tMB/s: %v\n\tthreads: %v\tforks: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000/1000, stats.AverageBytesPerSecond()/1024/1024, stats.Threads(), stats.Forks()) case <-ctx.Done(): return @@ -74,7 +74,7 @@ func main() { return } stats := conv.Stats() - log.Info("Conversion finished.", "entries", stats.Entries(), "MB", stats.Bytes()/1024/1024, "avg e/s", stats.AverageEntriesPerSecond(), "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) + log.Info("Conversion finished.", "entries", stats.Entries(), "MB", stats.Bytes()/1024/1024, "avg Me/s", stats.AverageEntriesPerSecond()/1000/1000, "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) } if config.Compact { From e779fe709937c49b7d945be3cee2a5a278c1fcd1 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Mon, 8 Jan 2024 12:51:01 +0000 Subject: [PATCH 16/58] change unit of entries per second --- cmd/dbconv/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 7312a6c2e7..8cfbfd5022 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -59,7 +59,7 @@ func main() { select { case <-ticker.C: stats := conv.Stats() - fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tMe/s: %v\tMB/s: %v\n\taverage:\tMe/s: %v\tMB/s: %v\n\tthreads: %v\tforks: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000/1000, stats.AverageBytesPerSecond()/1024/1024, stats.Threads(), stats.Forks()) + fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tKe/s: %v\tMB/s: %v\n\taverage:\tKe/s: %v\tMB/s: %v\n\tthreads: %v\tforks: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000, stats.AverageBytesPerSecond()/1024/1024, stats.Threads(), stats.Forks()) case <-ctx.Done(): return @@ -74,7 +74,7 @@ func main() { return } stats := conv.Stats() - log.Info("Conversion finished.", "entries", stats.Entries(), "MB", stats.Bytes()/1024/1024, "avg Me/s", stats.AverageEntriesPerSecond()/1000/1000, "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) + log.Info("Conversion finished.", "entries", stats.Entries(), "MB", stats.Bytes()/1024/1024, "avg Ke/s", stats.AverageEntriesPerSecond()/1000, "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) } if config.Compact { From bc41c66ad4375ac1a1ac26b2cf255993323eb16c Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Mon, 8 Jan 2024 15:07:13 +0000 Subject: [PATCH 17/58] shorten dbconv test --- cmd/dbconv/dbconv/dbconv_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index 66662e02ac..53c635e487 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -86,9 +86,9 @@ func TestConversion(t *testing.T) { config := DefaultDBConvConfig config.Src = oldDBConfig config.Dst = newDBConfig - config.Threads = 512 - config.IdealBatchSize = 100 - config.MinBatchesBeforeFork = 10 + config.Threads = 16 + config.IdealBatchSize = 512 + config.MinBatchesBeforeFork = 3 conv := NewDBConverter(&config) ctx, cancel := context.WithCancel(context.Background()) defer cancel() From 39a53116a207b58996c8a3b4c410960c4f6d197a Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 10 Jan 2024 17:05:05 +0000 Subject: [PATCH 18/58] add dbconv to Makefile --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4221100961..08fb843ab8 100644 --- a/Makefile +++ b/Makefile @@ -88,7 +88,7 @@ push: lint test-go .make/fmt all: build build-replay-env test-gen-proofs @touch .make/all -build: $(patsubst %,$(output_root)/bin/%, nitro deploy relay daserver datool seq-coordinator-invalidate nitro-val seq-coordinator-manager) +build: $(patsubst %,$(output_root)/bin/%, nitro deploy relay daserver datool seq-coordinator-invalidate nitro-val seq-coordinator-manager dbconv) @printf $(done) build-node-deps: $(go_source) build-prover-header build-prover-lib build-jit .make/solgen .make/cbrotli-lib @@ -188,6 +188,9 @@ $(output_root)/bin/nitro-val: $(DEP_PREDICATE) build-node-deps $(output_root)/bin/seq-coordinator-manager: $(DEP_PREDICATE) build-node-deps go build $(GOLANG_PARAMS) -o $@ "$(CURDIR)/cmd/seq-coordinator-manager" +$(output_root)/bin/dbconv: $(DEP_PREDICATE) build-node-deps + go build $(GOLANG_PARAMS) -o $@ "$(CURDIR)/cmd/dbconv" + # recompile wasm, but don't change timestamp unless files differ $(replay_wasm): $(DEP_PREDICATE) $(go_source) .make/solgen mkdir -p `dirname $(replay_wasm)` From dd33ae287703c466f4e80cf9d9e2871d78bc2469 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 11 Jan 2024 02:17:03 +0000 Subject: [PATCH 19/58] add dbconv to docker --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index b62e569259..78fa3399ed 100644 --- a/Dockerfile +++ b/Dockerfile @@ -208,6 +208,7 @@ COPY --from=node-builder /workspace/target/bin/nitro /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/relay /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/nitro-val /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/seq-coordinator-manager /usr/local/bin/ +COPY --from=node-builder /workspace/target/bin/dbconv /usr/local/bin/ COPY --from=machine-versions /workspace/machines /home/user/target/machines USER root RUN export DEBIAN_FRONTEND=noninteractive && \ From 48f5c849abf985e1ea2992017ed3cdab687bf8e0 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 5 Apr 2024 14:01:54 +0100 Subject: [PATCH 20/58] cmd/dbconv: add metrics --- cmd/dbconv/dbconv/config.go | 40 ++++++++++++++++++++++--------------- cmd/dbconv/dbconv/dbconv.go | 2 +- cmd/dbconv/main.go | 8 ++++++++ 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 141a8d297c..0d622637bf 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -5,37 +5,42 @@ import ( "fmt" "github.com/ethereum/go-ethereum/log" + "github.com/offchainlabs/nitro/cmd/genericconf" flag "github.com/spf13/pflag" ) type DBConfig struct { - Data string `koanf:"data"` - DBEngine string `koanf:"db-engine"` - Handles int `koanf:"handles"` - Cache int `koanf:"cache"` + Data string `koanf:"data"` + DBEngine string `koanf:"db-engine"` + Handles int `koanf:"handles"` + Cache int `koanf:"cache"` + Namespace string `koanf:"namespace"` } // TODO var DBConfigDefault = DBConfig{} -func DBConfigAddOptions(prefix string, f *flag.FlagSet) { +func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultNamespace string) { // TODO f.String(prefix+".data", DBConfigDefault.Data, "directory of stored chain state") f.String(prefix+".db-engine", DBConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") f.Int(prefix+".handles", DBConfigDefault.Handles, "number of file descriptor handles to use for the database") f.Int(prefix+".cache", DBConfigDefault.Cache, "the capacity(in megabytes) of the data caching") + f.String(prefix+".namespace", defaultNamespace, "metrics namespace") } type DBConvConfig struct { - Src DBConfig `koanf:"src"` - Dst DBConfig `koanf:"dst"` - Threads int `koanf:"threads"` - IdealBatchSize int `koanf:"ideal-batch-size"` - MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` - Convert bool `koanf:"convert"` - Compact bool `koanf:"compact"` - Verify int `koanf:"verify"` - LogLevel int `koanf:"log-level"` + Src DBConfig `koanf:"src"` + Dst DBConfig `koanf:"dst"` + Threads int `koanf:"threads"` + IdealBatchSize int `koanf:"ideal-batch-size"` + MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` + Convert bool `koanf:"convert"` + Compact bool `koanf:"compact"` + Verify int `koanf:"verify"` + LogLevel int `koanf:"log-level"` + Metrics bool `koanf:"metrics"` + MetricsServer genericconf.MetricsServerConfig `koanf:"metrics-server"` } var DefaultDBConvConfig = DBConvConfig{ @@ -46,11 +51,12 @@ var DefaultDBConvConfig = DBConvConfig{ Compact: false, Verify: 0, LogLevel: int(log.LvlDebug), + Metrics: false, } func DBConvConfigAddOptions(f *flag.FlagSet) { - DBConfigAddOptions("src", f) - DBConfigAddOptions("dst", f) + DBConfigAddOptions("src", f, "srcdb/") + DBConfigAddOptions("dst", f, "destdb/") f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use") f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") @@ -58,6 +64,8 @@ func DBConvConfigAddOptions(f *flag.FlagSet) { f.Bool("compact", DefaultDBConvConfig.Compact, "enables compaction step") f.Int("verify", DefaultDBConvConfig.Verify, "enables verification step (0 = disabled, 1 = only keys, 2 = keys and values)") f.Int("log-level", DefaultDBConvConfig.LogLevel, "log level (0 crit - 5 trace)") + f.Bool("metrics", DefaultDBConvConfig.Metrics, "enable metrics") + genericconf.MetricsServerAddOptions("metrics-server", f) } func (c *DBConvConfig) Validate() error { diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index e188352824..b01fc9a1fb 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -33,7 +33,7 @@ func openDB(config *DBConfig, readonly bool) (ethdb.Database, error) { Type: config.DBEngine, Directory: config.Data, AncientsDirectory: "", // don't open freezer - Namespace: "", // TODO do we need metrics namespace? + Namespace: config.Namespace, Cache: config.Cache, Handles: config.Handles, ReadOnly: readonly, diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 8cfbfd5022..298eace73a 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -7,6 +7,8 @@ import ( "time" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/metrics/exp" "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" "github.com/offchainlabs/nitro/cmd/genericconf" "github.com/offchainlabs/nitro/cmd/util/confighelpers" @@ -48,6 +50,12 @@ func main() { log.Error("Invalid config", "err", err) return } + + if config.Metrics { + go metrics.CollectProcessMetrics(config.MetricsServer.UpdateInterval) + exp.Setup(fmt.Sprintf("%v:%v", config.MetricsServer.Addr, config.MetricsServer.Port)) + } + conv := dbconv.NewDBConverter(config) ctx, cancel := context.WithCancel(context.Background()) defer cancel() From c3b4a19ef40ebf57f6824356f07752eb0977769f Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 24 Apr 2024 15:18:03 +0200 Subject: [PATCH 21/58] dbconv: add pebble config options --- cmd/dbconv/dbconv/config.go | 14 +++++++++----- cmd/dbconv/dbconv/dbconv.go | 15 ++++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 0d622637bf..ad9e66961e 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -5,16 +5,18 @@ import ( "fmt" "github.com/ethereum/go-ethereum/log" + "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/genericconf" flag "github.com/spf13/pflag" ) type DBConfig struct { - Data string `koanf:"data"` - DBEngine string `koanf:"db-engine"` - Handles int `koanf:"handles"` - Cache int `koanf:"cache"` - Namespace string `koanf:"namespace"` + Data string `koanf:"data"` + DBEngine string `koanf:"db-engine"` + Handles int `koanf:"handles"` + Cache int `koanf:"cache"` + Namespace string `koanf:"namespace"` + Pebble conf.PebbleConfig `koanf:"pebble"` } // TODO @@ -27,6 +29,7 @@ func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultNamespace string) f.Int(prefix+".handles", DBConfigDefault.Handles, "number of file descriptor handles to use for the database") f.Int(prefix+".cache", DBConfigDefault.Cache, "the capacity(in megabytes) of the data caching") f.String(prefix+".namespace", defaultNamespace, "metrics namespace") + conf.PebbleConfigAddOptions(prefix+".pebble", f) } type DBConvConfig struct { @@ -52,6 +55,7 @@ var DefaultDBConvConfig = DBConvConfig{ Verify: 0, LogLevel: int(log.LvlDebug), Metrics: false, + MetricsServer: genericconf.MetricsServerConfigDefault, } func DBConvConfigAddOptions(f *flag.FlagSet) { diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index b01fc9a1fb..2f9cf3ed99 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -30,13 +30,14 @@ func NewDBConverter(config *DBConvConfig) *DBConverter { func openDB(config *DBConfig, readonly bool) (ethdb.Database, error) { return rawdb.Open(rawdb.OpenOptions{ - Type: config.DBEngine, - Directory: config.Data, - AncientsDirectory: "", // don't open freezer - Namespace: config.Namespace, - Cache: config.Cache, - Handles: config.Handles, - ReadOnly: readonly, + Type: config.DBEngine, + Directory: config.Data, + AncientsDirectory: "", // don't open freezer + Namespace: config.Namespace, + Cache: config.Cache, + Handles: config.Handles, + ReadOnly: readonly, + PebbleExtraOptions: config.Pebble.ExtraOptions(), }) } From fc98de65953c41dcd09023d73666ffea2da96e07 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 4 Jun 2024 14:32:27 +0200 Subject: [PATCH 22/58] cmd/dbconv: remove multithreading option, update pebble extra options usage --- cmd/dbconv/dbconv/config.go | 56 +++++----- cmd/dbconv/dbconv/dbconv.go | 181 ++++--------------------------- cmd/dbconv/dbconv/dbconv_test.go | 68 ++---------- cmd/dbconv/dbconv/stats.go | 13 --- cmd/dbconv/main.go | 6 +- 5 files changed, 57 insertions(+), 267 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index ad9e66961e..256c8020fb 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -4,9 +4,9 @@ import ( "errors" "fmt" - "github.com/ethereum/go-ethereum/log" "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/genericconf" + "github.com/offchainlabs/nitro/execution/gethexec" flag "github.com/spf13/pflag" ) @@ -19,11 +19,13 @@ type DBConfig struct { Pebble conf.PebbleConfig `koanf:"pebble"` } -// TODO -var DBConfigDefault = DBConfig{} +var DBConfigDefault = DBConfig{ + Handles: conf.PersistentConfigDefault.Handles, + Cache: gethexec.DefaultCachingConfig.DatabaseCache, + Pebble: conf.PebbleConfigDefault, +} func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultNamespace string) { - // TODO f.String(prefix+".data", DBConfigDefault.Data, "directory of stored chain state") f.String(prefix+".db-engine", DBConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") f.Int(prefix+".handles", DBConfigDefault.Handles, "number of file descriptor handles to use for the database") @@ -33,49 +35,43 @@ func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultNamespace string) } type DBConvConfig struct { - Src DBConfig `koanf:"src"` - Dst DBConfig `koanf:"dst"` - Threads int `koanf:"threads"` - IdealBatchSize int `koanf:"ideal-batch-size"` - MinBatchesBeforeFork int `koanf:"min-batches-before-fork"` - Convert bool `koanf:"convert"` - Compact bool `koanf:"compact"` - Verify int `koanf:"verify"` - LogLevel int `koanf:"log-level"` - Metrics bool `koanf:"metrics"` - MetricsServer genericconf.MetricsServerConfig `koanf:"metrics-server"` + Src DBConfig `koanf:"src"` + Dst DBConfig `koanf:"dst"` + IdealBatchSize int `koanf:"ideal-batch-size"` + Convert bool `koanf:"convert"` + Compact bool `koanf:"compact"` + Verify int `koanf:"verify"` + LogLevel string `koanf:"log-level"` + LogType string `koanf:"log-type"` + Metrics bool `koanf:"metrics"` + MetricsServer genericconf.MetricsServerConfig `koanf:"metrics-server"` } var DefaultDBConvConfig = DBConvConfig{ - IdealBatchSize: 100 * 1024 * 1024, // 100 MB - MinBatchesBeforeFork: 2, - Threads: 1, - Convert: false, - Compact: false, - Verify: 0, - LogLevel: int(log.LvlDebug), - Metrics: false, - MetricsServer: genericconf.MetricsServerConfigDefault, + IdealBatchSize: 100 * 1024 * 1024, // 100 MB + Convert: false, + Compact: false, + Verify: 0, + LogLevel: "INFO", + LogType: "plaintext", + Metrics: false, + MetricsServer: genericconf.MetricsServerConfigDefault, } func DBConvConfigAddOptions(f *flag.FlagSet) { DBConfigAddOptions("src", f, "srcdb/") DBConfigAddOptions("dst", f, "destdb/") - f.Int("threads", DefaultDBConvConfig.Threads, "number of threads to use") f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") - f.Int("min-batches-before-fork", DefaultDBConvConfig.MinBatchesBeforeFork, "minimal number of batches before forking a thread") f.Bool("convert", DefaultDBConvConfig.Convert, "enables conversion step") f.Bool("compact", DefaultDBConvConfig.Compact, "enables compaction step") f.Int("verify", DefaultDBConvConfig.Verify, "enables verification step (0 = disabled, 1 = only keys, 2 = keys and values)") - f.Int("log-level", DefaultDBConvConfig.LogLevel, "log level (0 crit - 5 trace)") + f.String("log-level", DefaultDBConvConfig.LogLevel, "log level, valid values are CRIT, ERROR, WARN, INFO, DEBUG, TRACE") + f.String("log-type", DefaultDBConvConfig.LogType, "log type (plaintext or json)") f.Bool("metrics", DefaultDBConvConfig.Metrics, "enable metrics") genericconf.MetricsServerAddOptions("metrics-server", f) } func (c *DBConvConfig) Validate() error { - if c.Threads < 0 { - return fmt.Errorf("Invalid threads number: %v", c.Threads) - } if c.Verify < 0 || c.Verify > 2 { return fmt.Errorf("Invalid verify config value: %v", c.Verify) } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index 2f9cf3ed99..e51f4d2e20 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -3,10 +3,7 @@ package dbconv import ( "bytes" "context" - "errors" "fmt" - "math/big" - "sync" "time" "github.com/ethereum/go-ethereum/core/rawdb" @@ -28,7 +25,7 @@ func NewDBConverter(config *DBConvConfig) *DBConverter { } } -func openDB(config *DBConfig, readonly bool) (ethdb.Database, error) { +func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error) { return rawdb.Open(rawdb.OpenOptions{ Type: config.DBEngine, Directory: config.Data, @@ -37,118 +34,39 @@ func openDB(config *DBConfig, readonly bool) (ethdb.Database, error) { Cache: config.Cache, Handles: config.Handles, ReadOnly: readonly, - PebbleExtraOptions: config.Pebble.ExtraOptions(), + PebbleExtraOptions: config.Pebble.ExtraOptions(name), }) } -func middleKey(start []byte, end []byte) []byte { - if len(end) == 0 { - end = make([]byte, len(start)) - for i := range end { - end[i] = 0xff - } - } - if len(start) > len(end) { - tmp := make([]byte, len(start)) - copy(tmp, end) - end = tmp - } else if len(start) < len(end) { - tmp := make([]byte, len(end)) - copy(tmp, start) - start = tmp +func (c *DBConverter) Convert(ctx context.Context) error { + var err error + defer c.Close() + c.src, err = openDB(&c.config.Src, "src", true) + if err != nil { + return err } - s := new(big.Int).SetBytes(start) - e := new(big.Int).SetBytes(end) - sum := new(big.Int).Add(s, e) - var m big.Int - var mid []byte - if sum.Bit(0) == 1 { - m.Lsh(sum, 7) - mid = make([]byte, len(start)+1) - } else { - m.Rsh(sum, 1) - mid = make([]byte, len(start)) + c.dst, err = openDB(&c.config.Dst, "dst", false) + if err != nil { + return err } - m.FillBytes(mid) - return mid -} - -func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, wg *sync.WaitGroup, results chan error) { - log.Debug("new conversion worker", "start", start, "end", end) - c.stats.AddThread() - it := c.src.NewIterator(nil, start) + c.stats.Reset() + it := c.src.NewIterator(nil, nil) defer it.Release() - var err error - defer func() { - results <- err - }() - batch := c.dst.NewBatch() - // TODO support restarting in case of an interruption - n := 0 - f := 0 - canFork := true entriesInBatch := 0 - batchesSinceLastFork := 0 for it.Next() && ctx.Err() == nil { - key := it.Key() - n++ - if len(end) > 0 && bytes.Compare(key, end) >= 0 { - break - } - if err = batch.Put(key, it.Value()); err != nil { - return + if err = batch.Put(it.Key(), it.Value()); err != nil { + return err } entriesInBatch++ if batchSize := batch.ValueSize(); batchSize >= c.config.IdealBatchSize { if err = batch.Write(); err != nil { - return + return err } c.stats.AddEntries(int64(entriesInBatch)) c.stats.AddBytes(int64(batchSize)) - entriesInBatch = 0 batch.Reset() - batchesSinceLastFork++ - } - if canFork && batchesSinceLastFork >= c.config.MinBatchesBeforeFork { - select { - case err = <-results: - if err != nil { - return - } - if err = ctx.Err(); err != nil { - return - } - middle := middleKey(key, end) - if bytes.Compare(middle, key) > 0 && (len(end) == 0 || bytes.Compare(middle, end) < 0) { - // find next existing key after the middle to prevent the keys from growing too long - m := c.src.NewIterator(nil, middle) - if m.Next() { - foundMiddle := m.Key() - if len(end) == 0 || bytes.Compare(foundMiddle, end) < 0 { - wg.Add(1) - go c.copyEntries(ctx, foundMiddle, end, wg, results) - middle = foundMiddle - batchesSinceLastFork = 0 - c.stats.AddFork() - f++ - } else { - // no entries either after the middle key or for the middle key - results <- nil - } - } else { - // no entries either after the middle key or for the middle key - results <- nil - } - end = middle - m.Release() - } else { - log.Warn("no more forking", "key", key, "middle", middle, "end", end) - canFork = false - results <- nil - } - default: - } + entriesInBatch = 0 } } if err = ctx.Err(); err == nil { @@ -157,67 +75,12 @@ func (c *DBConverter) copyEntries(ctx context.Context, start []byte, end []byte, c.stats.AddEntries(int64(entriesInBatch)) c.stats.AddBytes(int64(batchSize)) } - log.Debug("worker done", "start", start, "end", end, "n", n, "forked", f) - c.stats.DecThread() - wg.Done() -} - -func (c *DBConverter) Convert(ctx context.Context) error { - var err error - defer c.Close() - c.src, err = openDB(&c.config.Src, true) - if err != nil { - return err - } - c.dst, err = openDB(&c.config.Dst, false) - if err != nil { - return err - } - // TODO - if c.config.Threads <= 0 { - return errors.New("invalid threads count") - } - - c.stats.Reset() - - // copy empty key entry - if has, _ := c.src.Has([]byte{}); has { - value, err := c.src.Get([]byte{}) - if err != nil { - return fmt.Errorf("Source database: failed to get value for an empty key: %w", err) - } - err = c.dst.Put([]byte{}, value) - if err != nil { - return fmt.Errorf("Destination database: failed to put value for an empty key: %w", err) - } - c.stats.AddEntries(1) - c.stats.AddBytes(int64(len(value))) // adding only value len as key is empty - } - results := make(chan error, c.config.Threads) - for i := 0; i < c.config.Threads-1; i++ { - results <- nil - } - var wg sync.WaitGroup - wg.Add(1) - go c.copyEntries(ctx, []byte{0}, nil, &wg, results) - wg.Wait() -drainLoop: - for { - select { - case err = <-results: - if err != nil { - return err - } - default: - break drainLoop - } - } - return nil + return err } func (c *DBConverter) CompactDestination() error { var err error - c.dst, err = openDB(&c.config.Dst, false) + c.dst, err = openDB(&c.config.Dst, "dst", false) if err != nil { return err } @@ -239,17 +102,16 @@ func (c *DBConverter) Verify(ctx context.Context) error { } var err error defer c.Close() - c.src, err = openDB(&c.config.Src, true) + c.src, err = openDB(&c.config.Src, "src", true) if err != nil { return err } - c.dst, err = openDB(&c.config.Dst, true) + c.dst, err = openDB(&c.config.Dst, "dst", true) if err != nil { return err } c.stats.Reset() - c.stats.AddThread() it := c.src.NewIterator(nil, nil) defer it.Release() for it.Next() && ctx.Err() == nil { @@ -273,7 +135,6 @@ func (c *DBConverter) Verify(ctx context.Context) error { } c.stats.AddEntries(1) } - c.stats.DecThread() return ctx.Err() } diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index 53c635e487..a0aca86330 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -3,52 +3,12 @@ package dbconv import ( "bytes" "context" - "math/big" - "math/rand" "testing" "github.com/ethereum/go-ethereum/log" "github.com/offchainlabs/nitro/util/testhelpers" ) -func TestMiddleKey(t *testing.T) { - triples := [][]byte{ - {0}, {0, 0}, {0, 0}, - {1}, {1, 1}, {1, 0, 128}, - {1}, {1, 0}, {1, 0}, - {1}, {2}, {1, 128}, - {1}, {2, 1}, {1, 128, 128}, - {0}, {255}, {127, 128}, - {0}, {}, {127, 128}, - {0, 0}, {}, {127, 255, 128}, - {1, 1}, {2}, {1, 128, 128}, - } - for i := 0; i < len(triples)-2; i += 3 { - start, end, expected := triples[i], triples[i+1], triples[i+2] - if mid := middleKey(start, end); !bytes.Equal(mid, expected) { - Fail(t, "Unexpected result for start:", start, "end:", end, "want:", expected, "have:", mid) - } - } - - for i := int64(0); i < 1000-1; i++ { - for j := int64(0); j < 1000; j++ { - start := big.NewInt(i).Bytes() - end := big.NewInt(j).Bytes() - if bytes.Compare(start, end) > 0 { - start, end = end, start - } - middle := middleKey(start, end) - if bytes.Compare(middle, start) < 0 { - Fail(t, "middle < start, start:", start, "end:", end, "middle:", middle) - } - if bytes.Compare(middle, end) > 0 { - Fail(t, "middle > end, start:", start, "end:", end, "middle:", middle) - } - } - } - -} - func TestConversion(t *testing.T) { _ = testhelpers.InitTestLog(t, log.LvlTrace) oldDBConfig := DBConfigDefault @@ -60,25 +20,13 @@ func TestConversion(t *testing.T) { newDBConfig.DBEngine = "pebble" func() { - oldDb, err := openDB(&oldDBConfig, false) - Require(t, err) + oldDb, err := openDB(&oldDBConfig, "", false) defer oldDb.Close() - for i := 0; i < 0xfe; i++ { - data := []byte{byte(i)} - err = oldDb.Put(data, data) - Require(t, err) - for j := 0; j < 0xf; j++ { - data := []byte{byte(i), byte(j)} - err = oldDb.Put(data, data) - Require(t, err) - } - } + Require(t, err) err = oldDb.Put([]byte{}, []byte{0xde, 0xed, 0xbe, 0xef}) Require(t, err) - for i := 0; i < 100000; i++ { - size := 1 + rand.Uint64()%100 - randomBytes := testhelpers.RandomizeSlice(make([]byte, size)) - err = oldDb.Put(randomBytes, []byte{byte(i)}) + for i := 0; i < 20; i++ { + err = oldDb.Put([]byte{byte(i)}, []byte{byte(i + 1)}) Require(t, err) } }() @@ -86,9 +34,7 @@ func TestConversion(t *testing.T) { config := DefaultDBConvConfig config.Src = oldDBConfig config.Dst = newDBConfig - config.Threads = 16 - config.IdealBatchSize = 512 - config.MinBatchesBeforeFork = 3 + config.IdealBatchSize = 5 conv := NewDBConverter(&config) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -96,10 +42,10 @@ func TestConversion(t *testing.T) { Require(t, err) conv.Close() - oldDb, err := openDB(&oldDBConfig, true) + oldDb, err := openDB(&oldDBConfig, "", true) Require(t, err) defer oldDb.Close() - newDb, err := openDB(&newDBConfig, true) + newDb, err := openDB(&newDBConfig, "", true) Require(t, err) defer newDb.Close() diff --git a/cmd/dbconv/dbconv/stats.go b/cmd/dbconv/dbconv/stats.go index 04e239a51f..b56289d06c 100644 --- a/cmd/dbconv/dbconv/stats.go +++ b/cmd/dbconv/dbconv/stats.go @@ -9,7 +9,6 @@ type Stats struct { entries atomic.Int64 bytes atomic.Int64 forks atomic.Int64 - threads atomic.Int64 startTimestamp int64 prevEntires int64 @@ -23,7 +22,6 @@ func (s *Stats) Reset() { s.entries.Store(0) s.bytes.Store(0) s.forks.Store(0) - s.threads.Store(0) s.startTimestamp = now s.prevEntires = 0 s.prevBytes = 0 @@ -55,17 +53,6 @@ func (s *Stats) Forks() int64 { return s.forks.Load() } -func (s *Stats) AddThread() { - s.threads.Add(1) -} -func (s *Stats) DecThread() { - s.threads.Add(-1) -} - -func (s *Stats) Threads() int64 { - return s.threads.Load() -} - func (s *Stats) Elapsed() time.Duration { now := time.Now().UnixNano() dt := now - s.startTimestamp diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 298eace73a..4782e8b38d 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -40,9 +40,9 @@ func main() { confighelpers.PrintErrorAndExit(err, printSampleUsage) return } - err = genericconf.InitLog("plaintext", log.Lvl(config.LogLevel), &genericconf.FileLoggingConfig{Enable: false}, nil) + err = genericconf.InitLog(config.LogType, config.LogLevel, &genericconf.FileLoggingConfig{Enable: false}, nil) if err != nil { - log.Error("Failed to init logging", "err", err) + fmt.Fprintf(os.Stderr, "Error initializing logging: %v\n", err) return } @@ -67,7 +67,7 @@ func main() { select { case <-ticker.C: stats := conv.Stats() - fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tKe/s: %v\tMB/s: %v\n\taverage:\tKe/s: %v\tMB/s: %v\n\tthreads: %v\tforks: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000, stats.AverageBytesPerSecond()/1024/1024, stats.Threads(), stats.Forks()) + fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tKe/s: %v\tMB/s: %v\n\taverage:\tKe/s: %v\tMB/s: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000, stats.AverageBytesPerSecond()/1024/1024) case <-ctx.Done(): return From d7f5ea96c7c9df3c5c93a5c5794a0ae082fe6ece Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 4 Jun 2024 15:22:17 +0200 Subject: [PATCH 23/58] system_tests: update db_conversion_test --- system_tests/db_conversion_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index 2e2348ccc4..5a43472031 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -48,7 +48,7 @@ func TestDatabaseConversion(t *testing.T) { t.Log("stopped first node") instanceDir := filepath.Join(dataDir, builder.l2StackConfig.Name) - for _, dbname := range []string{"chaindb", "arbitrumdata"} { + for _, dbname := range []string{"l2chaindata", "arbitrumdata", "wasm"} { err := os.Rename(filepath.Join(instanceDir, dbname), filepath.Join(instanceDir, fmt.Sprintf("%s_old", dbname))) Require(t, err) t.Log("converting:", dbname) @@ -62,7 +62,6 @@ func TestDatabaseConversion(t *testing.T) { convConfig := dbconv.DefaultDBConvConfig convConfig.Src = oldDBConfig convConfig.Dst = newDBConfig - convConfig.Threads = 32 conv := dbconv.NewDBConverter(&convConfig) err := conv.Convert(ctx) Require(t, err) From 5eac1d4554354b4d80b4ed8db598f15c8f95065f Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 4 Jun 2024 17:17:47 +0200 Subject: [PATCH 24/58] cmd/dbconv: format numbers in progress message --- cmd/dbconv/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index 4782e8b38d..bcd41de84f 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -67,7 +67,7 @@ func main() { select { case <-ticker.C: stats := conv.Stats() - fmt.Printf("Progress:\n\tprocessed entries: %v\n\tprocessed data (MB): %v\n\telapsed: %v\n\tcurrent:\tKe/s: %v\tMB/s: %v\n\taverage:\tKe/s: %v\tMB/s: %v\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000, stats.AverageBytesPerSecond()/1024/1024) + fmt.Printf("Progress:\n\tprocessed entries: %d\n\tprocessed data (MB): %d\n\telapsed: %v\n\tcurrent:\tKe/s: %.3f\tMB/s: %.3f\n\taverage:\tKe/s: %.3f\tMB/s: %.3f\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000, stats.AverageBytesPerSecond()/1024/1024) case <-ctx.Done(): return From e308cf610e67c35404a94fb191ee22a807392422 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 7 Jun 2024 01:26:23 +0200 Subject: [PATCH 25/58] scripts: add initial version of convert-databases.bash --- scripts/convert-databases.bash | 138 +++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100755 scripts/convert-databases.bash diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash new file mode 100755 index 0000000000..841e0ac229 --- /dev/null +++ b/scripts/convert-databases.bash @@ -0,0 +1,138 @@ +#!/usr/bin/env bash + +set -e + +DEFAULT_DBCONV=/usr/local/bin/dbconv +DEFAULT_SRC=/home/user/.arbitrum/arb1/nitro + +dbconv=$DEFAULT_DBCONV +src=$DEFAULT_SRC +dst= +force=false +skip_existing=false + +checkMissingValue () { + if [[ $1 -eq 0 || $2 == -* ]]; then + echo "missing $3 argument value" + exit 1 + fi +} + +while [[ $# -gt 0 ]]; do + case $1 in + --dbconv) + shift + checkMissingValue $# "$1" "--dbconv" + dbconv=$1 + shift + ;; + --src) + shift + checkMissingValue $# "$1" "--src" + src=$1 + shift + ;; + --dst) + shift + checkMissingValue $# "$1" "--dst" + dst=$1 + shift + ;; + --force) + force=true + shift + ;; + --skip-existing) + skip_existing=true + shift + ;; + *) + echo Usage: $0 \[OPTIONS..\] + echo + echo OPTIONS: + echo "--dbconv dbconv binary path (default: \"$DEFAULT_DBCONV\")" + echo "--src root directory containinig source databases (default: \"$DEFAULT_SRC\")" + echo "--dst destination path" + echo "--force remove destination directory if it exists" + echo "--skip-existing skip convertion of databases which directories already exist in the destination directory" + exit 0 + esac +done + +if ! [ -e "$dbconv" ]; then + echo Error: Invalid dbconv binary path: "$dbconv" does not exist + exit 1 +fi + +if ! [ -d "$src" ]; then + echo Error: Invalid source path: "$src" is not a directory + exit 1 +fi + +if ! [ -d $src/l2chaindata/ancient ]; then + echo Error: Invalid ancient path: $src/l2chaindata/ancient is not a directory +fi + +src=$(realpath $src) +if [ -e "$dst" ] && ! $skip_existing; then + if $force; then + echo == Warning! Destination already exists, --force is set, this will remove all files under the path: "$dst" + read -p "are you sure? [y/n]" -n 1 response + echo + if [[ $response == "y" ]] || [[ $response == "Y" ]]; then + (set -x; rm -r "$dst" || exit 1) + else + exit 0 + fi + else + echo Error: invalid destination path: "$dst" already exists + exit 1 + fi +fi + +if ! [ -e $dst/l2chaindata ]; then + echo "== Converting l2chaindata db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/l2chaindata --dst.db-engine="pebble" --dst.data $dst/l2chaindata --convert --compact) + echo "== Copying l2chaindata freezer" + (set -x; cp -r $src/l2chaindata/ancient $dst/l2chaindata/) +else + if $skip_existing; then + echo "== l2chaindata directory already exists, skipping conversion (--skip-existing flag is set)" + else + # unreachable, we already had to remove root directory + exit 1 + fi +fi + +echo + +if ! [ -e $dst/arbitrumdata ]; then + echo "== Converting arbitrumdata db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/arbitrumdata --dst.db-engine="pebble" --dst.data $dst/arbitrumdata --convert --compact) +else + if $skip_existing; then + echo "== arbitrumdata directory already exists, skipping conversion (--skip-existing flag is set)" + else + # unreachable, we already had to remove root directory + exit 1 + fi +fi + +echo +if [ -e $src/wasm ]; then + if ! [ -e $dst/wasm ]; then + echo "== Converting wasm db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/wasm --dst.db-engine="pebble" --dst.data $dst/wasm --convert --compact) + else + if $skip_existing; then + echo "== wasm directory already exists, skipping conversion (--skip-existing flag is set)" + else + # unreachable, we already had to remove root directory + exit 1 + fi + fi +else + echo "== Warning! Source directory does not contain wasm database. That is expected if source database was created with nitro version older then v2.4.0-beta.1" +fi + +echo "== Done." From 8657e51bee6eeadc5c53ccbff5b47991e72ef19d Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 11 Jun 2024 14:28:47 +0200 Subject: [PATCH 26/58] scripts: improve convert-database script --- scripts/convert-databases.bash | 119 +++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 21 deletions(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index 841e0ac229..1fcde2a533 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -11,6 +11,12 @@ dst= force=false skip_existing=false +l2chaindata_status="unknown" +l2chaindata_ancient_status="unknown" +arbitrumdata_status="unknown" +wasm_status="unknown" +classicmsg_status="unknown" + checkMissingValue () { if [[ $1 -eq 0 || $2 == -* ]]; then echo "missing $3 argument value" @@ -18,6 +24,26 @@ checkMissingValue () { fi } +printStatus() { + echo "== Conversion status:" + echo " l2chaindata database: $l2chaindata_status" + echo " l2chaindata database freezer (ancient): $l2chaindata_ancient_status" + echo " arbitrumdata database: $arbitrumdata_status" + echo " wasm database: $wasm_status" + echo " classic-msg database: $classicmsg_status" +} + +printUsage() { +echo Usage: $0 \[OPTIONS..\] + echo + echo OPTIONS: + echo "--dbconv dbconv binary path (default: \"$DEFAULT_DBCONV\")" + echo "--src directory containing source databases (default: \"$DEFAULT_SRC\")" + echo "--dst destination directory" + echo "--force remove destination directory if it exists" + echo "--skip-existing skip convertion of databases which directories already exist in the destination directory" +} + while [[ $# -gt 0 ]]; do case $1 in --dbconv) @@ -47,14 +73,7 @@ while [[ $# -gt 0 ]]; do shift ;; *) - echo Usage: $0 \[OPTIONS..\] - echo - echo OPTIONS: - echo "--dbconv dbconv binary path (default: \"$DEFAULT_DBCONV\")" - echo "--src root directory containinig source databases (default: \"$DEFAULT_SRC\")" - echo "--dst destination path" - echo "--force remove destination directory if it exists" - echo "--skip-existing skip convertion of databases which directories already exist in the destination directory" + printUsage exit 0 esac done @@ -64,19 +83,37 @@ if ! [ -e "$dbconv" ]; then exit 1 fi +if ! [ -n "$dst" ]; then + echo Error: Missing destination directory \(\-\-dst\) + printUsage + exit 1 +fi + if ! [ -d "$src" ]; then - echo Error: Invalid source path: "$src" is not a directory + echo Error: Invalid source directory: \""$src"\" is missing + exit 1 +fi + +src=$(realpath $src) + +if ! [ -d "$src"/l2chaindata ]; then + echo Error: Invalid source directory: \""$src"/l2chaindata\" is missing exit 1 fi if ! [ -d $src/l2chaindata/ancient ]; then - echo Error: Invalid ancient path: $src/l2chaindata/ancient is not a directory + echo Error: Invalid source directory: \""$src"/l2chaindata/ancient\" is missing + exit 1 +fi + +if ! [ -d "$src"/arbitrumdata ]; then + echo Error: Invalid source directory: missing "$src/arbitrumdata" directory + exit 1 fi -src=$(realpath $src) if [ -e "$dst" ] && ! $skip_existing; then if $force; then - echo == Warning! Destination already exists, --force is set, this will remove all files under the path: "$dst" + echo == Warning! Destination already exists, --force is set, this will remove all files under path: "$dst" read -p "are you sure? [y/n]" -n 1 response echo if [[ $response == "y" ]] || [[ $response == "Y" ]]; then @@ -92,47 +129,87 @@ fi if ! [ -e $dst/l2chaindata ]; then echo "== Converting l2chaindata db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/l2chaindata --dst.db-engine="pebble" --dst.data $dst/l2chaindata --convert --compact) - echo "== Copying l2chaindata freezer" - (set -x; cp -r $src/l2chaindata/ancient $dst/l2chaindata/) + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/l2chaindata --dst.db-engine="pebble" --dst.data $dst/l2chaindata --convert --compact) || (l2chaindata_status="conversion failed"; printStatus; exit 1) + l2chaindata_status="converted" else if $skip_existing; then echo "== l2chaindata directory already exists, skipping conversion (--skip-existing flag is set)" + l2chaindata_status="skipped" else # unreachable, we already had to remove root directory + echo script error, reached unreachable exit 1 fi fi -echo +if ! [ -e $dst/l2chaindata/ancient ]; then + echo "== Copying l2chaindata ancients" + (set -x; cp -r $src/l2chaindata/ancient $dst/l2chaindata/) || (l2chaindata_ancient_status="failed to copy"; printStatus; exit 1) + l2chaindata_ancient_status="copied" +else + if $skip_existing; then + echo "== l2chaindata/ancient directory already exists, skipping copy (--skip-existing flag is set)" + l2chaindata_ancient_status="skipped" + else + # unreachable, we already had to remove root directory + echo script error, reached unreachable + exit 1 + fi +fi if ! [ -e $dst/arbitrumdata ]; then echo "== Converting arbitrumdata db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/arbitrumdata --dst.db-engine="pebble" --dst.data $dst/arbitrumdata --convert --compact) + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/arbitrumdata --dst.db-engine="pebble" --dst.data $dst/arbitrumdata --convert --compact) || (arbitrumdata_status="conversion failed"; printStatus; exit 1) + arbitrumdata_status="converted" else if $skip_existing; then echo "== arbitrumdata directory already exists, skipping conversion (--skip-existing flag is set)" + arbitrumdata_status="skipped" else # unreachable, we already had to remove root directory + echo script error, reached unreachable exit 1 fi fi -echo if [ -e $src/wasm ]; then if ! [ -e $dst/wasm ]; then echo "== Converting wasm db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/wasm --dst.db-engine="pebble" --dst.data $dst/wasm --convert --compact) + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/wasm --dst.db-engine="pebble" --dst.data $dst/wasm --convert --compact) || (wasm_status="conversion failed"; printStatus; exit 1) + wasm_status="converted" else if $skip_existing; then echo "== wasm directory already exists, skipping conversion (--skip-existing flag is set)" + wasm_status="skipped" + else + # unreachable, we already had to remove root directory + echo script error, reached unreachable + exit 1 + fi + fi +else + echo "== Note: Source directory does not contain wasm database." + wasm_status="not found in source directory" +fi + +if [ -e $src/classic-msg ]; then + if ! [ -e $dst/classic-msg ]; then + echo "== Converting classic-msg db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/classic-msg --dst.db-engine="pebble" --dst.data $dst/classic-msg --convert --compact) || (classicmsg_status="conversion failed"; printStatus; exit 1) + classicmsg_status="converted" + else + if $skip_existing; then + echo "== classic-msg directory already exists, skipping conversion (--skip-existing flag is set)" + classicmsg_status="skipped" else # unreachable, we already had to remove root directory + echo script error, reached unreachable exit 1 fi fi else - echo "== Warning! Source directory does not contain wasm database. That is expected if source database was created with nitro version older then v2.4.0-beta.1" + echo "== Note: Source directory does not contain classic-msg database." + classicmsg_status="not found in source directory" fi -echo "== Done." +printStatus From 09d0371170d0764d21870d0bd8cb6957e5ca2a28 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 11 Jun 2024 14:30:10 +0200 Subject: [PATCH 27/58] cmd/dbconv: return 1 on error from main binary --- cmd/dbconv/main.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index bcd41de84f..e4938e9597 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -38,17 +38,17 @@ func main() { config, err := parseDBConv(args) if err != nil { confighelpers.PrintErrorAndExit(err, printSampleUsage) - return + os.Exit(1) } err = genericconf.InitLog(config.LogType, config.LogLevel, &genericconf.FileLoggingConfig{Enable: false}, nil) if err != nil { fmt.Fprintf(os.Stderr, "Error initializing logging: %v\n", err) - return + os.Exit(1) } if err = config.Validate(); err != nil { log.Error("Invalid config", "err", err) - return + os.Exit(1) } if config.Metrics { @@ -79,7 +79,7 @@ func main() { err = conv.Convert(ctx) if err != nil { log.Error("Conversion error", "err", err) - return + os.Exit(1) } stats := conv.Stats() log.Info("Conversion finished.", "entries", stats.Entries(), "MB", stats.Bytes()/1024/1024, "avg Ke/s", stats.AverageEntriesPerSecond()/1000, "avg MB/s", stats.AverageBytesPerSecond()/1024/1024, "elapsed", stats.Elapsed()) @@ -90,7 +90,7 @@ func main() { err = conv.CompactDestination() if err != nil { log.Error("Compaction error", "err", err) - return + os.Exit(1) } } @@ -99,7 +99,7 @@ func main() { err = conv.Verify(ctx) if err != nil { log.Error("Verification error", "err", err) - return + os.Exit(1) } stats := conv.Stats() log.Info("Verification completed successfully.", "elapsed", stats.Elapsed()) From c47ee348fedfae7afe43ab5912881bf5cc8025d4 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 11 Jun 2024 14:35:46 +0200 Subject: [PATCH 28/58] scripts: add --help flag to convert-databases.bash --- scripts/convert-databases.bash | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index 1fcde2a533..a1b0c284a2 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -72,6 +72,10 @@ while [[ $# -gt 0 ]]; do skip_existing=true shift ;; + --help) + printUsage + exit 0 + ;; *) printUsage exit 0 From 25ab55debc47115212dd2dc238665011da0db3b3 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 11 Jun 2024 15:06:38 +0200 Subject: [PATCH 29/58] scripts: add extra flags check in convert-databases.bash --- scripts/convert-databases.bash | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index a1b0c284a2..e2a76e025f 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -82,6 +82,12 @@ while [[ $# -gt 0 ]]; do esac done +if $force && $skip_existing; then + echo Error: Cannot use both --force and --skipexisting + printUsage + exit 1 +fi + if ! [ -e "$dbconv" ]; then echo Error: Invalid dbconv binary path: "$dbconv" does not exist exit 1 From 7f9f7efe3deaa4fa77459dbc2040e1fa630bb386 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik <10907694+magicxyyz@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:46:12 +0200 Subject: [PATCH 30/58] Update cmd/dbconv/dbconv/config.go Co-authored-by: Diego Ximenes Mendes --- cmd/dbconv/dbconv/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 256c8020fb..19c625c0aa 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -28,7 +28,7 @@ var DBConfigDefault = DBConfig{ func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultNamespace string) { f.String(prefix+".data", DBConfigDefault.Data, "directory of stored chain state") f.String(prefix+".db-engine", DBConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") - f.Int(prefix+".handles", DBConfigDefault.Handles, "number of file descriptor handles to use for the database") + f.Int(prefix+".handles", DBConfigDefault.Handles, "number of files to be open simultaneously") f.Int(prefix+".cache", DBConfigDefault.Cache, "the capacity(in megabytes) of the data caching") f.String(prefix+".namespace", defaultNamespace, "metrics namespace") conf.PebbleConfigAddOptions(prefix+".pebble", f) From cfb139362ddd691fa0c323fe1e328c700b49aee5 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Mon, 17 Jun 2024 23:37:00 +0200 Subject: [PATCH 31/58] dbconv: address review comments * move config validation to parseDBConv * use DBConverter.Verify in test * check for error when searching for unexpected keys * fix typo, remove forks number from dbconv stats * add log when conversion starts * retab convert-databases.bash * move progress printing to func * cleanup db conversion system test * change verify config type to string * remove src and dst fields from DBConverter * add ideal-batch-size validation --- cmd/dbconv/dbconv/config.go | 15 +- cmd/dbconv/dbconv/dbconv.go | 73 ++++---- cmd/dbconv/dbconv/dbconv_test.go | 39 ++--- cmd/dbconv/dbconv/stats.go | 26 +-- cmd/dbconv/main.go | 25 +-- scripts/convert-databases.bash | 264 ++++++++++++++--------------- system_tests/db_conversion_test.go | 29 ++-- 7 files changed, 226 insertions(+), 245 deletions(-) diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 19c625c0aa..3d938fdfd1 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -40,7 +40,7 @@ type DBConvConfig struct { IdealBatchSize int `koanf:"ideal-batch-size"` Convert bool `koanf:"convert"` Compact bool `koanf:"compact"` - Verify int `koanf:"verify"` + Verify string `koanf:"verify"` LogLevel string `koanf:"log-level"` LogType string `koanf:"log-type"` Metrics bool `koanf:"metrics"` @@ -51,7 +51,7 @@ var DefaultDBConvConfig = DBConvConfig{ IdealBatchSize: 100 * 1024 * 1024, // 100 MB Convert: false, Compact: false, - Verify: 0, + Verify: "", LogLevel: "INFO", LogType: "plaintext", Metrics: false, @@ -64,7 +64,7 @@ func DBConvConfigAddOptions(f *flag.FlagSet) { f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") f.Bool("convert", DefaultDBConvConfig.Convert, "enables conversion step") f.Bool("compact", DefaultDBConvConfig.Compact, "enables compaction step") - f.Int("verify", DefaultDBConvConfig.Verify, "enables verification step (0 = disabled, 1 = only keys, 2 = keys and values)") + f.String("verify", DefaultDBConvConfig.Verify, "enables verification step (\"\" = disabled, \"keys\" = only keys, \"full\" = keys and values)") f.String("log-level", DefaultDBConvConfig.LogLevel, "log level, valid values are CRIT, ERROR, WARN, INFO, DEBUG, TRACE") f.String("log-type", DefaultDBConvConfig.LogType, "log type (plaintext or json)") f.Bool("metrics", DefaultDBConvConfig.Metrics, "enable metrics") @@ -72,11 +72,14 @@ func DBConvConfigAddOptions(f *flag.FlagSet) { } func (c *DBConvConfig) Validate() error { - if c.Verify < 0 || c.Verify > 2 { - return fmt.Errorf("Invalid verify config value: %v", c.Verify) + if c.Verify != "keys" && c.Verify != "full" && c.Verify != "" { + return fmt.Errorf("Invalid verify mode: %v", c.Verify) } - if !c.Convert && c.Verify == 0 && !c.Compact { + if !c.Convert && c.Verify == "" && !c.Compact { return errors.New("nothing to be done, conversion, verification and compaction disabled") } + if c.IdealBatchSize <= 0 { + return fmt.Errorf("Invalid ideal batch size: %d, has to be greater then 0", c.IdealBatchSize) + } return nil } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index e51f4d2e20..a98c853c50 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -14,9 +14,6 @@ import ( type DBConverter struct { config *DBConvConfig stats Stats - - src ethdb.Database - dst ethdb.Database } func NewDBConverter(config *DBConvConfig) *DBConverter { @@ -27,9 +24,11 @@ func NewDBConverter(config *DBConvConfig) *DBConverter { func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error) { return rawdb.Open(rawdb.OpenOptions{ - Type: config.DBEngine, - Directory: config.Data, - AncientsDirectory: "", // don't open freezer + Type: config.DBEngine, + Directory: config.Data, + // we don't open freezer, it doesn't need to be converted as it has format independent of db-engine + // note: user needs to handle copying/moving the ancient directory + AncientsDirectory: "", Namespace: config.Namespace, Cache: config.Cache, Handles: config.Handles, @@ -40,19 +39,21 @@ func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error func (c *DBConverter) Convert(ctx context.Context) error { var err error - defer c.Close() - c.src, err = openDB(&c.config.Src, "src", true) + src, err := openDB(&c.config.Src, "src", true) if err != nil { return err } - c.dst, err = openDB(&c.config.Dst, "dst", false) + defer src.Close() + dst, err := openDB(&c.config.Dst, "dst", false) if err != nil { return err } + defer dst.Close() c.stats.Reset() - it := c.src.NewIterator(nil, nil) + log.Info("Converting database", "src", c.config.Src.Data, "dst", c.config.Dst.Data, "db-engine", c.config.Dst.DBEngine) + it := src.NewIterator(nil, nil) defer it.Release() - batch := c.dst.NewBatch() + batch := dst.NewBatch() entriesInBatch := 0 for it.Next() && ctx.Err() == nil { if err = batch.Put(it.Key(), it.Value()); err != nil { @@ -71,7 +72,9 @@ func (c *DBConverter) Convert(ctx context.Context) error { } if err = ctx.Err(); err == nil { batchSize := batch.ValueSize() - err = batch.Write() + if err = batch.Write(); err != nil { + return err + } c.stats.AddEntries(int64(entriesInBatch)) c.stats.AddBytes(int64(batchSize)) } @@ -79,15 +82,14 @@ func (c *DBConverter) Convert(ctx context.Context) error { } func (c *DBConverter) CompactDestination() error { - var err error - c.dst, err = openDB(&c.config.Dst, "dst", false) + dst, err := openDB(&c.config.Dst, "dst", false) if err != nil { return err } - defer c.dst.Close() + defer dst.Close() start := time.Now() - log.Info("Compacting destination database", "data", c.config.Dst.Data) - if err := c.dst.Compact(nil, nil); err != nil { + log.Info("Compacting destination database", "dst", c.config.Dst.Data) + if err := dst.Compact(nil, nil); err != nil { return err } log.Info("Compaction done", "elapsed", time.Since(start)) @@ -95,34 +97,40 @@ func (c *DBConverter) CompactDestination() error { } func (c *DBConverter) Verify(ctx context.Context) error { - if c.config.Verify == 1 { + if c.config.Verify == "keys" { log.Info("Starting quick verification - verifying only keys existence") - } else { + } else if c.config.Verify == "full" { log.Info("Starting full verification - verifying keys and values") } var err error - defer c.Close() - c.src, err = openDB(&c.config.Src, "src", true) + src, err := openDB(&c.config.Src, "src", true) if err != nil { return err } - c.dst, err = openDB(&c.config.Dst, "dst", true) + defer src.Close() + + dst, err := openDB(&c.config.Dst, "dst", true) if err != nil { return err } + defer dst.Close() c.stats.Reset() - it := c.src.NewIterator(nil, nil) + it := src.NewIterator(nil, nil) defer it.Release() for it.Next() && ctx.Err() == nil { switch c.config.Verify { - case 1: - if has, err := c.dst.Has(it.Key()); !has { - return fmt.Errorf("Missing key in destination db, key: %v, err: %w", it.Key(), err) + case "keys": + has, err := dst.Has(it.Key()) + if err != nil { + return fmt.Errorf("Failed to check key existence in destination db, key: %v, err: %w", it.Key(), err) + } + if !has { + return fmt.Errorf("Missing key in destination db, key: %v", it.Key()) } c.stats.AddBytes(int64(len(it.Key()))) - case 2: - dstValue, err := c.dst.Get(it.Key()) + case "full": + dstValue, err := dst.Get(it.Key()) if err != nil { return err } @@ -141,12 +149,3 @@ func (c *DBConverter) Verify(ctx context.Context) error { func (c *DBConverter) Stats() *Stats { return &c.stats } - -func (c *DBConverter) Close() { - if c.src != nil { - c.src.Close() - } - if c.dst != nil { - c.dst.Close() - } -} diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index a0aca86330..16d42269f7 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -1,7 +1,6 @@ package dbconv import ( - "bytes" "context" "testing" @@ -35,43 +34,33 @@ func TestConversion(t *testing.T) { config.Src = oldDBConfig config.Dst = newDBConfig config.IdealBatchSize = 5 + config.Verify = "full" conv := NewDBConverter(&config) ctx, cancel := context.WithCancel(context.Background()) defer cancel() + err := conv.Convert(ctx) Require(t, err) - conv.Close() + err = conv.Verify(ctx) + Require(t, err) + + // check if new database doesn't have any extra keys oldDb, err := openDB(&oldDBConfig, "", true) Require(t, err) defer oldDb.Close() newDb, err := openDB(&newDBConfig, "", true) Require(t, err) defer newDb.Close() - - func() { - it := oldDb.NewIterator(nil, nil) - defer it.Release() - for it.Next() { - if has, _ := newDb.Has(it.Key()); !has { - t.Log("Missing key in the converted db, key:", it.Key()) - } - newValue, err := newDb.Get(it.Key()) - Require(t, err) - if !bytes.Equal(newValue, it.Value()) { - Fail(t, "Value mismatch, old:", it.Value(), "new:", newValue) - } - } - }() - func() { - it := newDb.NewIterator(nil, nil) - defer it.Release() - for it.Next() { - if has, _ := oldDb.Has(it.Key()); !has { - Fail(t, "Unexpected key in the converted db, key:", it.Key()) - } + it := newDb.NewIterator(nil, nil) + defer it.Release() + for it.Next() { + has, err := oldDb.Has(it.Key()) + Require(t, err) + if !has { + Fail(t, "Unexpected key in the converted db, key:", it.Key()) } - }() + } } func Require(t *testing.T, err error, printables ...interface{}) { diff --git a/cmd/dbconv/dbconv/stats.go b/cmd/dbconv/dbconv/stats.go index b56289d06c..91fc2529d6 100644 --- a/cmd/dbconv/dbconv/stats.go +++ b/cmd/dbconv/dbconv/stats.go @@ -8,12 +8,11 @@ import ( type Stats struct { entries atomic.Int64 bytes atomic.Int64 - forks atomic.Int64 startTimestamp int64 - prevEntires int64 + prevEntries int64 prevBytes int64 - prevEntiresTimestamp int64 + prevEntriesTimestamp int64 prevBytesTimestamp int64 } @@ -21,11 +20,10 @@ func (s *Stats) Reset() { now := time.Now().UnixNano() s.entries.Store(0) s.bytes.Store(0) - s.forks.Store(0) s.startTimestamp = now - s.prevEntires = 0 + s.prevEntries = 0 s.prevBytes = 0 - s.prevEntiresTimestamp = now + s.prevEntriesTimestamp = now s.prevBytesTimestamp = now } @@ -45,14 +43,6 @@ func (s *Stats) Bytes() int64 { return s.bytes.Load() } -func (s *Stats) AddFork() { - s.forks.Add(1) -} - -func (s *Stats) Forks() int64 { - return s.forks.Load() -} - func (s *Stats) Elapsed() time.Duration { now := time.Now().UnixNano() dt := now - s.startTimestamp @@ -63,13 +53,13 @@ func (s *Stats) Elapsed() time.Duration { func (s *Stats) EntriesPerSecond() float64 { now := time.Now().UnixNano() current := s.Entries() - dt := now - s.prevEntiresTimestamp + dt := now - s.prevEntriesTimestamp if dt == 0 { dt = 1 } - de := current - s.prevEntires - s.prevEntires = current - s.prevEntiresTimestamp = now + de := current - s.prevEntries + s.prevEntries = current + s.prevEntriesTimestamp = now return float64(de) * 1e9 / float64(dt) } diff --git a/cmd/dbconv/main.go b/cmd/dbconv/main.go index e4938e9597..c0b5c8f8e4 100644 --- a/cmd/dbconv/main.go +++ b/cmd/dbconv/main.go @@ -26,31 +26,36 @@ func parseDBConv(args []string) (*dbconv.DBConvConfig, error) { if err := confighelpers.EndCommonParse(k, &config); err != nil { return nil, err } - return &config, nil + return &config, config.Validate() } func printSampleUsage(name string) { fmt.Printf("Sample usage: %s --help \n\n", name) } +func printProgress(conv *dbconv.DBConverter) { + stats := conv.Stats() + fmt.Printf("Progress:\n") + fmt.Printf("\tprocessed entries: %d\n", stats.Entries()) + fmt.Printf("\tprocessed data (MB): %d\n", stats.Bytes()/1024/1024) + fmt.Printf("\telapsed:\t%v\n", stats.Elapsed()) + fmt.Printf("\tcurrent:\t%.3e entries/s\t%.3f MB/s\n", stats.EntriesPerSecond()/1000, stats.BytesPerSecond()/1024/1024) + fmt.Printf("\taverage:\t%.3e entries/s\t%.3f MB/s\n", stats.AverageEntriesPerSecond()/1000, stats.AverageBytesPerSecond()/1024/1024) +} + func main() { args := os.Args[1:] config, err := parseDBConv(args) if err != nil { confighelpers.PrintErrorAndExit(err, printSampleUsage) - os.Exit(1) } + err = genericconf.InitLog(config.LogType, config.LogLevel, &genericconf.FileLoggingConfig{Enable: false}, nil) if err != nil { fmt.Fprintf(os.Stderr, "Error initializing logging: %v\n", err) os.Exit(1) } - if err = config.Validate(); err != nil { - log.Error("Invalid config", "err", err) - os.Exit(1) - } - if config.Metrics { go metrics.CollectProcessMetrics(config.MetricsServer.UpdateInterval) exp.Setup(fmt.Sprintf("%v:%v", config.MetricsServer.Addr, config.MetricsServer.Port)) @@ -66,9 +71,7 @@ func main() { for { select { case <-ticker.C: - stats := conv.Stats() - fmt.Printf("Progress:\n\tprocessed entries: %d\n\tprocessed data (MB): %d\n\telapsed: %v\n\tcurrent:\tKe/s: %.3f\tMB/s: %.3f\n\taverage:\tKe/s: %.3f\tMB/s: %.3f\n", stats.Entries(), stats.Bytes()/1024/1024, stats.Elapsed(), stats.EntriesPerSecond()/1000, stats.BytesPerSecond()/1024/1024, stats.AverageEntriesPerSecond()/1000, stats.AverageBytesPerSecond()/1024/1024) - + printProgress(conv) case <-ctx.Done(): return } @@ -94,7 +97,7 @@ func main() { } } - if config.Verify > 0 { + if config.Verify != "" { ticker.Reset(10 * time.Second) err = conv.Verify(ctx) if err != nil { diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index e2a76e025f..5929f186a7 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -19,207 +19,207 @@ classicmsg_status="unknown" checkMissingValue () { if [[ $1 -eq 0 || $2 == -* ]]; then - echo "missing $3 argument value" - exit 1 - fi + echo "missing $3 argument value" + exit 1 + fi } printStatus() { - echo "== Conversion status:" - echo " l2chaindata database: $l2chaindata_status" - echo " l2chaindata database freezer (ancient): $l2chaindata_ancient_status" - echo " arbitrumdata database: $arbitrumdata_status" - echo " wasm database: $wasm_status" - echo " classic-msg database: $classicmsg_status" + echo "== Conversion status:" + echo " l2chaindata database: $l2chaindata_status" + echo " l2chaindata database freezer (ancient): $l2chaindata_ancient_status" + echo " arbitrumdata database: $arbitrumdata_status" + echo " wasm database: $wasm_status" + echo " classic-msg database: $classicmsg_status" } printUsage() { echo Usage: $0 \[OPTIONS..\] - echo - echo OPTIONS: - echo "--dbconv dbconv binary path (default: \"$DEFAULT_DBCONV\")" - echo "--src directory containing source databases (default: \"$DEFAULT_SRC\")" - echo "--dst destination directory" - echo "--force remove destination directory if it exists" - echo "--skip-existing skip convertion of databases which directories already exist in the destination directory" + echo + echo OPTIONS: + echo "--dbconv dbconv binary path (default: \"$DEFAULT_DBCONV\")" + echo "--src directory containing source databases (default: \"$DEFAULT_SRC\")" + echo "--dst destination directory" + echo "--force remove destination directory if it exists" + echo "--skip-existing skip convertion of databases which directories already exist in the destination directory" } while [[ $# -gt 0 ]]; do case $1 in --dbconv) shift - checkMissingValue $# "$1" "--dbconv" - dbconv=$1 + checkMissingValue $# "$1" "--dbconv" + dbconv=$1 shift ;; - --src) + --src) shift - checkMissingValue $# "$1" "--src" - src=$1 + checkMissingValue $# "$1" "--src" + src=$1 shift - ;; - --dst) + ;; + --dst) + shift + checkMissingValue $# "$1" "--dst" + dst=$1 + shift + ;; + --force) + force=true shift - checkMissingValue $# "$1" "--dst" - dst=$1 + ;; + --skip-existing) + skip_existing=true shift - ;; - --force) - force=true - shift - ;; - --skip-existing) - skip_existing=true - shift - ;; - --help) - printUsage + ;; + --help) + printUsage exit 0 - ;; + ;; *) - printUsage + printUsage exit 0 esac done if $force && $skip_existing; then - echo Error: Cannot use both --force and --skipexisting - printUsage - exit 1 + echo Error: Cannot use both --force and --skipexisting + printUsage + exit 1 fi if ! [ -e "$dbconv" ]; then - echo Error: Invalid dbconv binary path: "$dbconv" does not exist - exit 1 + echo Error: Invalid dbconv binary path: "$dbconv" does not exist + exit 1 fi if ! [ -n "$dst" ]; then - echo Error: Missing destination directory \(\-\-dst\) - printUsage - exit 1 + echo Error: Missing destination directory \(\-\-dst\) + printUsage + exit 1 fi if ! [ -d "$src" ]; then - echo Error: Invalid source directory: \""$src"\" is missing - exit 1 + echo Error: Invalid source directory: \""$src"\" is missing + exit 1 fi src=$(realpath $src) if ! [ -d "$src"/l2chaindata ]; then - echo Error: Invalid source directory: \""$src"/l2chaindata\" is missing - exit 1 + echo Error: Invalid source directory: \""$src"/l2chaindata\" is missing + exit 1 fi if ! [ -d $src/l2chaindata/ancient ]; then - echo Error: Invalid source directory: \""$src"/l2chaindata/ancient\" is missing - exit 1 + echo Error: Invalid source directory: \""$src"/l2chaindata/ancient\" is missing + exit 1 fi if ! [ -d "$src"/arbitrumdata ]; then - echo Error: Invalid source directory: missing "$src/arbitrumdata" directory - exit 1 + echo Error: Invalid source directory: missing "$src/arbitrumdata" directory + exit 1 fi if [ -e "$dst" ] && ! $skip_existing; then - if $force; then - echo == Warning! Destination already exists, --force is set, this will remove all files under path: "$dst" - read -p "are you sure? [y/n]" -n 1 response - echo - if [[ $response == "y" ]] || [[ $response == "Y" ]]; then - (set -x; rm -r "$dst" || exit 1) - else - exit 0 - fi - else - echo Error: invalid destination path: "$dst" already exists - exit 1 - fi + if $force; then + echo == Warning! Destination already exists, --force is set, this will remove all files under path: "$dst" + read -p "are you sure? [y/n]" -n 1 response + echo + if [[ $response == "y" ]] || [[ $response == "Y" ]]; then + (set -x; rm -r "$dst" || exit 1) + else + exit 0 + fi + else + echo Error: invalid destination path: "$dst" already exists + exit 1 + fi fi if ! [ -e $dst/l2chaindata ]; then - echo "== Converting l2chaindata db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/l2chaindata --dst.db-engine="pebble" --dst.data $dst/l2chaindata --convert --compact) || (l2chaindata_status="conversion failed"; printStatus; exit 1) - l2chaindata_status="converted" + echo "== Converting l2chaindata db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/l2chaindata --dst.db-engine="pebble" --dst.data $dst/l2chaindata --convert --compact) || (l2chaindata_status="conversion failed"; printStatus; exit 1) + l2chaindata_status="converted" else - if $skip_existing; then - echo "== l2chaindata directory already exists, skipping conversion (--skip-existing flag is set)" - l2chaindata_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi + if $skip_existing; then + echo "== l2chaindata directory already exists, skipping conversion (--skip-existing flag is set)" + l2chaindata_status="skipped" + else + # unreachable, we already had to remove root directory + echo script error, reached unreachable + exit 1 + fi fi if ! [ -e $dst/l2chaindata/ancient ]; then - echo "== Copying l2chaindata ancients" - (set -x; cp -r $src/l2chaindata/ancient $dst/l2chaindata/) || (l2chaindata_ancient_status="failed to copy"; printStatus; exit 1) - l2chaindata_ancient_status="copied" + echo "== Copying l2chaindata ancients" + (set -x; cp -r $src/l2chaindata/ancient $dst/l2chaindata/) || (l2chaindata_ancient_status="failed to copy"; printStatus; exit 1) + l2chaindata_ancient_status="copied" else - if $skip_existing; then - echo "== l2chaindata/ancient directory already exists, skipping copy (--skip-existing flag is set)" - l2chaindata_ancient_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi + if $skip_existing; then + echo "== l2chaindata/ancient directory already exists, skipping copy (--skip-existing flag is set)" + l2chaindata_ancient_status="skipped" + else + # unreachable, we already had to remove root directory + echo script error, reached unreachable + exit 1 + fi fi if ! [ -e $dst/arbitrumdata ]; then - echo "== Converting arbitrumdata db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/arbitrumdata --dst.db-engine="pebble" --dst.data $dst/arbitrumdata --convert --compact) || (arbitrumdata_status="conversion failed"; printStatus; exit 1) - arbitrumdata_status="converted" + echo "== Converting arbitrumdata db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/arbitrumdata --dst.db-engine="pebble" --dst.data $dst/arbitrumdata --convert --compact) || (arbitrumdata_status="conversion failed"; printStatus; exit 1) + arbitrumdata_status="converted" else - if $skip_existing; then - echo "== arbitrumdata directory already exists, skipping conversion (--skip-existing flag is set)" - arbitrumdata_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi + if $skip_existing; then + echo "== arbitrumdata directory already exists, skipping conversion (--skip-existing flag is set)" + arbitrumdata_status="skipped" + else + # unreachable, we already had to remove root directory + echo script error, reached unreachable + exit 1 + fi fi if [ -e $src/wasm ]; then - if ! [ -e $dst/wasm ]; then - echo "== Converting wasm db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/wasm --dst.db-engine="pebble" --dst.data $dst/wasm --convert --compact) || (wasm_status="conversion failed"; printStatus; exit 1) - wasm_status="converted" - else - if $skip_existing; then - echo "== wasm directory already exists, skipping conversion (--skip-existing flag is set)" - wasm_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi - fi + if ! [ -e $dst/wasm ]; then + echo "== Converting wasm db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/wasm --dst.db-engine="pebble" --dst.data $dst/wasm --convert --compact) || (wasm_status="conversion failed"; printStatus; exit 1) + wasm_status="converted" + else + if $skip_existing; then + echo "== wasm directory already exists, skipping conversion (--skip-existing flag is set)" + wasm_status="skipped" + else + # unreachable, we already had to remove root directory + echo script error, reached unreachable + exit 1 + fi + fi else - echo "== Note: Source directory does not contain wasm database." - wasm_status="not found in source directory" + echo "== Note: Source directory does not contain wasm database." + wasm_status="not found in source directory" fi if [ -e $src/classic-msg ]; then - if ! [ -e $dst/classic-msg ]; then - echo "== Converting classic-msg db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/classic-msg --dst.db-engine="pebble" --dst.data $dst/classic-msg --convert --compact) || (classicmsg_status="conversion failed"; printStatus; exit 1) - classicmsg_status="converted" - else - if $skip_existing; then - echo "== classic-msg directory already exists, skipping conversion (--skip-existing flag is set)" - classicmsg_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi - fi + if ! [ -e $dst/classic-msg ]; then + echo "== Converting classic-msg db" + (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/classic-msg --dst.db-engine="pebble" --dst.data $dst/classic-msg --convert --compact) || (classicmsg_status="conversion failed"; printStatus; exit 1) + classicmsg_status="converted" + else + if $skip_existing; then + echo "== classic-msg directory already exists, skipping conversion (--skip-existing flag is set)" + classicmsg_status="skipped" + else + # unreachable, we already had to remove root directory + echo script error, reached unreachable + exit 1 + fi + fi else - echo "== Note: Source directory does not contain classic-msg database." - classicmsg_status="not found in source directory" + echo "== Note: Source directory does not contain classic-msg database." + classicmsg_status="not found in source directory" fi printStatus diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index 5a43472031..5f1619e86c 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -44,7 +44,6 @@ func TestDatabaseConversion(t *testing.T) { } l2CleanupDone = true builder.L2.cleanup() - bc := builder.L2.ExecNode.Backend.ArbInterface().BlockChain() t.Log("stopped first node") instanceDir := filepath.Join(dataDir, builder.l2StackConfig.Name) @@ -52,20 +51,18 @@ func TestDatabaseConversion(t *testing.T) { err := os.Rename(filepath.Join(instanceDir, dbname), filepath.Join(instanceDir, fmt.Sprintf("%s_old", dbname))) Require(t, err) t.Log("converting:", dbname) - func() { - oldDBConfig := dbconv.DBConfigDefault - oldDBConfig.Data = path.Join(instanceDir, fmt.Sprintf("%s_old", dbname)) - oldDBConfig.DBEngine = "leveldb" - newDBConfig := dbconv.DBConfigDefault - newDBConfig.Data = path.Join(instanceDir, dbname) - newDBConfig.DBEngine = "pebble" - convConfig := dbconv.DefaultDBConvConfig - convConfig.Src = oldDBConfig - convConfig.Dst = newDBConfig - conv := dbconv.NewDBConverter(&convConfig) - err := conv.Convert(ctx) - Require(t, err) - }() + oldDBConfig := dbconv.DBConfigDefault + oldDBConfig.Data = path.Join(instanceDir, fmt.Sprintf("%s_old", dbname)) + oldDBConfig.DBEngine = "leveldb" + newDBConfig := dbconv.DBConfigDefault + newDBConfig.Data = path.Join(instanceDir, dbname) + newDBConfig.DBEngine = "pebble" + convConfig := dbconv.DefaultDBConvConfig + convConfig.Src = oldDBConfig + convConfig.Dst = newDBConfig + conv := dbconv.NewDBConverter(&convConfig) + err = conv.Convert(ctx) + Require(t, err) } builder.l2StackConfig.DBEngine = "pebble" @@ -79,7 +76,7 @@ func TestDatabaseConversion(t *testing.T) { _, err = testClient.EnsureTxSucceeded(tx) Require(t, err) - bc = testClient.ExecNode.Backend.ArbInterface().BlockChain() + bc := testClient.ExecNode.Backend.ArbInterface().BlockChain() current := bc.CurrentBlock() if current == nil { Fatal(t, "failed to get current block header") From 6a22f1ddb70803385bad49852036ef07404164a5 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 25 Jun 2024 16:36:28 +0200 Subject: [PATCH 32/58] refactor convert-databases script --- scripts/convert-databases.bash | 132 +++++++++++++++++---------------- 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index 5929f186a7..56044eb04c 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -1,7 +1,5 @@ #!/usr/bin/env bash -set -e - DEFAULT_DBCONV=/usr/local/bin/dbconv DEFAULT_SRC=/home/user/.arbitrum/arb1/nitro @@ -11,11 +9,11 @@ dst= force=false skip_existing=false -l2chaindata_status="unknown" -l2chaindata_ancient_status="unknown" -arbitrumdata_status="unknown" -wasm_status="unknown" -classicmsg_status="unknown" +l2chaindata_status="n/a" +l2chaindata_ancient_status="n/a" +arbitrumdata_status="n/a" +wasm_status="n/a" +classicmsg_status="n/a" checkMissingValue () { if [[ $1 -eq 0 || $2 == -* ]]; then @@ -137,28 +135,57 @@ if [ -e "$dst" ] && ! $skip_existing; then fi fi -if ! [ -e $dst/l2chaindata ]; then - echo "== Converting l2chaindata db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/l2chaindata --dst.db-engine="pebble" --dst.data $dst/l2chaindata --convert --compact) || (l2chaindata_status="conversion failed"; printStatus; exit 1) - l2chaindata_status="converted" -else - if $skip_existing; then - echo "== l2chaindata directory already exists, skipping conversion (--skip-existing flag is set)" - l2chaindata_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi +convert_result= +convert () { + srcdir=$(echo $src/$1 | tr -s /) + dstdir=$(echo $dst/$1 | tr -s /) + if ! [ -e $dstdir ]; then + echo "== Converting $1 db" + cmd="$dbconv --src.db-engine=leveldb --src.data $srcdir --dst.db-engine=pebble --dst.data $dstdir --convert --compact" + echo $cmd + $cmd + if [ $? -ne 0 ]; then + convert_result="FAILED" + return 1 + fi + convert_result="converted" + return 0 + else + if $skip_existing; then + echo "== Note: $dstdir directory already exists, skipping conversion (--skip-existing flag is set)" + convert_result="skipped" + return 0 + else + convert_result="FAILED ($dstdir already exists)" + return 1 + fi + fi +} + +convert "l2chaindata" +res=$? +l2chaindata_status=$convert_result +if [ $res -ne 0 ]; then + printStatus + exit 1 fi if ! [ -e $dst/l2chaindata/ancient ]; then + ancient_src=$(echo $src/l2chaindata/ancient | tr -s /) + ancient_dst=$(echo $dst/l2chaindata/ | tr -s /) echo "== Copying l2chaindata ancients" - (set -x; cp -r $src/l2chaindata/ancient $dst/l2chaindata/) || (l2chaindata_ancient_status="failed to copy"; printStatus; exit 1) + cmd="cp -r $ancient_src $ancient_dst" + echo $cmd + $cmd + if [ $? -ne 0 ]; then + l2chaindata_ancient_status="FAILED (failed to copy)" + printStatus + exit 1 + fi l2chaindata_ancient_status="copied" else if $skip_existing; then - echo "== l2chaindata/ancient directory already exists, skipping copy (--skip-existing flag is set)" + echo "== Note: l2chaindata/ancient directory already exists, skipping copy (--skip-existing flag is set)" l2chaindata_ancient_status="skipped" else # unreachable, we already had to remove root directory @@ -167,56 +194,35 @@ else fi fi -if ! [ -e $dst/arbitrumdata ]; then - echo "== Converting arbitrumdata db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/arbitrumdata --dst.db-engine="pebble" --dst.data $dst/arbitrumdata --convert --compact) || (arbitrumdata_status="conversion failed"; printStatus; exit 1) - arbitrumdata_status="converted" -else - if $skip_existing; then - echo "== arbitrumdata directory already exists, skipping conversion (--skip-existing flag is set)" - arbitrumdata_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi +convert "arbitrumdata" +res=$? +arbitrumdata_status=$convert_result +if [ $res -ne 0 ]; then + printStatus + exit 1 fi if [ -e $src/wasm ]; then - if ! [ -e $dst/wasm ]; then - echo "== Converting wasm db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/wasm --dst.db-engine="pebble" --dst.data $dst/wasm --convert --compact) || (wasm_status="conversion failed"; printStatus; exit 1) - wasm_status="converted" - else - if $skip_existing; then - echo "== wasm directory already exists, skipping conversion (--skip-existing flag is set)" - wasm_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi - fi + convert "wasm" + res=$? + wasm_status=$convert_result + if [ $res -ne 0 ]; then + printStatus + exit 1 + fi else echo "== Note: Source directory does not contain wasm database." wasm_status="not found in source directory" fi if [ -e $src/classic-msg ]; then - if ! [ -e $dst/classic-msg ]; then - echo "== Converting classic-msg db" - (set -x; $dbconv --src.db-engine="leveldb" --src.data $src/classic-msg --dst.db-engine="pebble" --dst.data $dst/classic-msg --convert --compact) || (classicmsg_status="conversion failed"; printStatus; exit 1) - classicmsg_status="converted" - else - if $skip_existing; then - echo "== classic-msg directory already exists, skipping conversion (--skip-existing flag is set)" - classicmsg_status="skipped" - else - # unreachable, we already had to remove root directory - echo script error, reached unreachable - exit 1 - fi - fi + convert "classic-msg" + res=$? + classicmsg_status=$convert_result + if [ $res -ne 0 ]; then + printStatus + exit 1 + fi else echo "== Note: Source directory does not contain classic-msg database." classicmsg_status="not found in source directory" From dc24202d56e05a958391f03a92bca7d5c6a89ef2 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 25 Jun 2024 16:47:09 +0200 Subject: [PATCH 33/58] retab convert-databases script --- scripts/convert-databases.bash | 102 ++++++++++++++++----------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index 56044eb04c..c1803b5d98 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -137,51 +137,51 @@ fi convert_result= convert () { - srcdir=$(echo $src/$1 | tr -s /) - dstdir=$(echo $dst/$1 | tr -s /) - if ! [ -e $dstdir ]; then - echo "== Converting $1 db" - cmd="$dbconv --src.db-engine=leveldb --src.data $srcdir --dst.db-engine=pebble --dst.data $dstdir --convert --compact" - echo $cmd - $cmd - if [ $? -ne 0 ]; then - convert_result="FAILED" - return 1 - fi - convert_result="converted" - return 0 - else - if $skip_existing; then - echo "== Note: $dstdir directory already exists, skipping conversion (--skip-existing flag is set)" - convert_result="skipped" - return 0 - else - convert_result="FAILED ($dstdir already exists)" - return 1 - fi - fi + srcdir=$(echo $src/$1 | tr -s /) + dstdir=$(echo $dst/$1 | tr -s /) + if ! [ -e $dstdir ]; then + echo "== Converting $1 db" + cmd="$dbconv --src.db-engine=leveldb --src.data $srcdir --dst.db-engine=pebble --dst.data $dstdir --convert --compact" + echo $cmd + $cmd + if [ $? -ne 0 ]; then + convert_result="FAILED" + return 1 + fi + convert_result="converted" + return 0 + else + if $skip_existing; then + echo "== Note: $dstdir directory already exists, skipping conversion (--skip-existing flag is set)" + convert_result="skipped" + return 0 + else + convert_result="FAILED ($dstdir already exists)" + return 1 + fi + fi } convert "l2chaindata" res=$? l2chaindata_status=$convert_result if [ $res -ne 0 ]; then - printStatus - exit 1 + printStatus + exit 1 fi if ! [ -e $dst/l2chaindata/ancient ]; then - ancient_src=$(echo $src/l2chaindata/ancient | tr -s /) - ancient_dst=$(echo $dst/l2chaindata/ | tr -s /) + ancient_src=$(echo $src/l2chaindata/ancient | tr -s /) + ancient_dst=$(echo $dst/l2chaindata/ | tr -s /) echo "== Copying l2chaindata ancients" - cmd="cp -r $ancient_src $ancient_dst" - echo $cmd - $cmd - if [ $? -ne 0 ]; then - l2chaindata_ancient_status="FAILED (failed to copy)" - printStatus - exit 1 - fi + cmd="cp -r $ancient_src $ancient_dst" + echo $cmd + $cmd + if [ $? -ne 0 ]; then + l2chaindata_ancient_status="FAILED (failed to copy)" + printStatus + exit 1 + fi l2chaindata_ancient_status="copied" else if $skip_existing; then @@ -198,31 +198,31 @@ convert "arbitrumdata" res=$? arbitrumdata_status=$convert_result if [ $res -ne 0 ]; then - printStatus - exit 1 + printStatus + exit 1 fi if [ -e $src/wasm ]; then - convert "wasm" - res=$? - wasm_status=$convert_result - if [ $res -ne 0 ]; then - printStatus - exit 1 - fi + convert "wasm" + res=$? + wasm_status=$convert_result + if [ $res -ne 0 ]; then + printStatus + exit 1 + fi else echo "== Note: Source directory does not contain wasm database." wasm_status="not found in source directory" fi if [ -e $src/classic-msg ]; then - convert "classic-msg" - res=$? - classicmsg_status=$convert_result - if [ $res -ne 0 ]; then - printStatus - exit 1 - fi + convert "classic-msg" + res=$? + classicmsg_status=$convert_result + if [ $res -ne 0 ]; then + printStatus + exit 1 + fi else echo "== Note: Source directory does not contain classic-msg database." classicmsg_status="not found in source directory" From bc3d784513a514bc267bfdfbc61b86993d25d49e Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 28 Jun 2024 17:19:03 +0200 Subject: [PATCH 34/58] pass default config to DBConfigAddOptions --- cmd/conf/database.go | 52 +++++++++++++++--------------- cmd/dbconv/dbconv/config.go | 37 +++++++++++++-------- cmd/dbconv/dbconv/dbconv_test.go | 6 ++-- system_tests/db_conversion_test.go | 10 ++---- 4 files changed, 54 insertions(+), 51 deletions(-) diff --git a/cmd/conf/database.go b/cmd/conf/database.go index 6fde00579f..c35dc22b5c 100644 --- a/cmd/conf/database.go +++ b/cmd/conf/database.go @@ -43,7 +43,7 @@ func PersistentConfigAddOptions(prefix string, f *flag.FlagSet) { f.Int(prefix+".handles", PersistentConfigDefault.Handles, "number of file descriptor handles to use for the database") f.String(prefix+".ancient", PersistentConfigDefault.Ancient, "directory of ancient where the chain freezer can be opened") f.String(prefix+".db-engine", PersistentConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") - PebbleConfigAddOptions(prefix+".pebble", f) + PebbleConfigAddOptions(prefix+".pebble", f, &PersistentConfigDefault.Pebble) } func (c *PersistentConfig) ResolveDirectoryNames() error { @@ -119,9 +119,9 @@ var PebbleConfigDefault = PebbleConfig{ Experimental: PebbleExperimentalConfigDefault, } -func PebbleConfigAddOptions(prefix string, f *flag.FlagSet) { - f.Int(prefix+".max-concurrent-compactions", PebbleConfigDefault.MaxConcurrentCompactions, "maximum number of concurrent compactions") - PebbleExperimentalConfigAddOptions(prefix+".experimental", f) +func PebbleConfigAddOptions(prefix string, f *flag.FlagSet, defaultConfig *PebbleConfig) { + f.Int(prefix+".max-concurrent-compactions", defaultConfig.MaxConcurrentCompactions, "maximum number of concurrent compactions") + PebbleExperimentalConfigAddOptions(prefix+".experimental", f, &defaultConfig.Experimental) } func (c *PebbleConfig) Validate() error { @@ -188,29 +188,29 @@ var PebbleExperimentalConfigDefault = PebbleExperimentalConfig{ ForceWriterParallelism: false, } -func PebbleExperimentalConfigAddOptions(prefix string, f *flag.FlagSet) { - f.Int(prefix+".bytes-per-sync", PebbleExperimentalConfigDefault.BytesPerSync, "number of bytes to write to a SSTable before calling Sync on it in the background") - f.Int(prefix+".l0-compaction-file-threshold", PebbleExperimentalConfigDefault.L0CompactionFileThreshold, "count of L0 files necessary to trigger an L0 compaction") - f.Int(prefix+".l0-compaction-threshold", PebbleExperimentalConfigDefault.L0CompactionThreshold, "amount of L0 read-amplification necessary to trigger an L0 compaction") - f.Int(prefix+".l0-stop-writes-threshold", PebbleExperimentalConfigDefault.L0StopWritesThreshold, "hard limit on L0 read-amplification, computed as the number of L0 sublevels. Writes are stopped when this threshold is reached") - f.Int64(prefix+".l-base-max-bytes", PebbleExperimentalConfigDefault.LBaseMaxBytes, "The maximum number of bytes for LBase. The base level is the level which L0 is compacted into. The base level is determined dynamically based on the existing data in the LSM. The maximum number of bytes for other levels is computed dynamically based on the base level's maximum size. When the maximum number of bytes for a level is exceeded, compaction is requested.") - f.Int(prefix+".mem-table-stop-writes-threshold", PebbleExperimentalConfigDefault.MemTableStopWritesThreshold, "hard limit on the number of queued of MemTables") - f.Bool(prefix+".disable-automatic-compactions", PebbleExperimentalConfigDefault.DisableAutomaticCompactions, "disables automatic compactions") - f.Int(prefix+".wal-bytes-per-sync", PebbleExperimentalConfigDefault.WALBytesPerSync, "number of bytes to write to a write-ahead log (WAL) before calling Sync on it in the background") - f.String(prefix+".wal-dir", PebbleExperimentalConfigDefault.WALDir, "absolute path of directory to store write-ahead logs (WALs) in. If empty, WALs will be stored in the same directory as sstables") - f.Int(prefix+".wal-min-sync-interval", PebbleExperimentalConfigDefault.WALMinSyncInterval, "minimum duration in microseconds between syncs of the WAL. If WAL syncs are requested faster than this interval, they will be artificially delayed.") - f.Int(prefix+".target-byte-deletion-rate", PebbleExperimentalConfigDefault.TargetByteDeletionRate, "rate (in bytes per second) at which sstable file deletions are limited to (under normal circumstances).") - f.Int(prefix+".block-size", PebbleExperimentalConfigDefault.BlockSize, "target uncompressed size in bytes of each table block") - f.Int(prefix+".index-block-size", PebbleExperimentalConfigDefault.IndexBlockSize, fmt.Sprintf("target uncompressed size in bytes of each index block. When the index block size is larger than this target, two-level indexes are automatically enabled. Setting this option to a large value (such as %d) disables the automatic creation of two-level indexes.", math.MaxInt32)) - f.Int64(prefix+".target-file-size", PebbleExperimentalConfigDefault.TargetFileSize, "target file size for the level 0") - f.Bool(prefix+".target-file-size-equal-levels", PebbleExperimentalConfigDefault.TargetFileSizeEqualLevels, "if true same target-file-size will be uses for all levels, otherwise target size for layer n = 2 * target size for layer n - 1") +func PebbleExperimentalConfigAddOptions(prefix string, f *flag.FlagSet, defaultConfig *PebbleExperimentalConfig) { + f.Int(prefix+".bytes-per-sync", defaultConfig.BytesPerSync, "number of bytes to write to a SSTable before calling Sync on it in the background") + f.Int(prefix+".l0-compaction-file-threshold", defaultConfig.L0CompactionFileThreshold, "count of L0 files necessary to trigger an L0 compaction") + f.Int(prefix+".l0-compaction-threshold", defaultConfig.L0CompactionThreshold, "amount of L0 read-amplification necessary to trigger an L0 compaction") + f.Int(prefix+".l0-stop-writes-threshold", defaultConfig.L0StopWritesThreshold, "hard limit on L0 read-amplification, computed as the number of L0 sublevels. Writes are stopped when this threshold is reached") + f.Int64(prefix+".l-base-max-bytes", defaultConfig.LBaseMaxBytes, "The maximum number of bytes for LBase. The base level is the level which L0 is compacted into. The base level is determined dynamically based on the existing data in the LSM. The maximum number of bytes for other levels is computed dynamically based on the base level's maximum size. When the maximum number of bytes for a level is exceeded, compaction is requested.") + f.Int(prefix+".mem-table-stop-writes-threshold", defaultConfig.MemTableStopWritesThreshold, "hard limit on the number of queued of MemTables") + f.Bool(prefix+".disable-automatic-compactions", defaultConfig.DisableAutomaticCompactions, "disables automatic compactions") + f.Int(prefix+".wal-bytes-per-sync", defaultConfig.WALBytesPerSync, "number of bytes to write to a write-ahead log (WAL) before calling Sync on it in the background") + f.String(prefix+".wal-dir", defaultConfig.WALDir, "absolute path of directory to store write-ahead logs (WALs) in. If empty, WALs will be stored in the same directory as sstables") + f.Int(prefix+".wal-min-sync-interval", defaultConfig.WALMinSyncInterval, "minimum duration in microseconds between syncs of the WAL. If WAL syncs are requested faster than this interval, they will be artificially delayed.") + f.Int(prefix+".target-byte-deletion-rate", defaultConfig.TargetByteDeletionRate, "rate (in bytes per second) at which sstable file deletions are limited to (under normal circumstances).") + f.Int(prefix+".block-size", defaultConfig.BlockSize, "target uncompressed size in bytes of each table block") + f.Int(prefix+".index-block-size", defaultConfig.IndexBlockSize, fmt.Sprintf("target uncompressed size in bytes of each index block. When the index block size is larger than this target, two-level indexes are automatically enabled. Setting this option to a large value (such as %d) disables the automatic creation of two-level indexes.", math.MaxInt32)) + f.Int64(prefix+".target-file-size", defaultConfig.TargetFileSize, "target file size for the level 0") + f.Bool(prefix+".target-file-size-equal-levels", defaultConfig.TargetFileSizeEqualLevels, "if true same target-file-size will be uses for all levels, otherwise target size for layer n = 2 * target size for layer n - 1") - f.Int(prefix+".l0-compaction-concurrency", PebbleExperimentalConfigDefault.L0CompactionConcurrency, "threshold of L0 read-amplification at which compaction concurrency is enabled (if compaction-debt-concurrency was not already exceeded). Every multiple of this value enables another concurrent compaction up to max-concurrent-compactions.") - f.Uint64(prefix+".compaction-debt-concurrency", PebbleExperimentalConfigDefault.CompactionDebtConcurrency, "controls the threshold of compaction debt at which additional compaction concurrency slots are added. For every multiple of this value in compaction debt bytes, an additional concurrent compaction is added. This works \"on top\" of l0-compaction-concurrency, so the higher of the count of compaction concurrency slots as determined by the two options is chosen.") - f.Int64(prefix+".read-compaction-rate", PebbleExperimentalConfigDefault.ReadCompactionRate, "controls the frequency of read triggered compactions by adjusting `AllowedSeeks` in manifest.FileMetadata: AllowedSeeks = FileSize / ReadCompactionRate") - f.Int64(prefix+".read-sampling-multiplier", PebbleExperimentalConfigDefault.ReadSamplingMultiplier, "a multiplier for the readSamplingPeriod in iterator.maybeSampleRead() to control the frequency of read sampling to trigger a read triggered compaction. A value of -1 prevents sampling and disables read triggered compactions. Geth default is -1. The pebble default is 1 << 4. which gets multiplied with a constant of 1 << 16 to yield 1 << 20 (1MB).") - f.Int(prefix+".max-writer-concurrency", PebbleExperimentalConfigDefault.MaxWriterConcurrency, "maximum number of compression workers the compression queue is allowed to use. If max-writer-concurrency > 0, then the Writer will use parallelism, to compress and write blocks to disk. Otherwise, the writer will compress and write blocks to disk synchronously.") - f.Bool(prefix+".force-writer-parallelism", PebbleExperimentalConfigDefault.ForceWriterParallelism, "force parallelism in the sstable Writer for the metamorphic tests. Even with the MaxWriterConcurrency option set, pebble only enables parallelism in the sstable Writer if there is enough CPU available, and this option bypasses that.") + f.Int(prefix+".l0-compaction-concurrency", defaultConfig.L0CompactionConcurrency, "threshold of L0 read-amplification at which compaction concurrency is enabled (if compaction-debt-concurrency was not already exceeded). Every multiple of this value enables another concurrent compaction up to max-concurrent-compactions.") + f.Uint64(prefix+".compaction-debt-concurrency", defaultConfig.CompactionDebtConcurrency, "controls the threshold of compaction debt at which additional compaction concurrency slots are added. For every multiple of this value in compaction debt bytes, an additional concurrent compaction is added. This works \"on top\" of l0-compaction-concurrency, so the higher of the count of compaction concurrency slots as determined by the two options is chosen.") + f.Int64(prefix+".read-compaction-rate", defaultConfig.ReadCompactionRate, "controls the frequency of read triggered compactions by adjusting `AllowedSeeks` in manifest.FileMetadata: AllowedSeeks = FileSize / ReadCompactionRate") + f.Int64(prefix+".read-sampling-multiplier", defaultConfig.ReadSamplingMultiplier, "a multiplier for the readSamplingPeriod in iterator.maybeSampleRead() to control the frequency of read sampling to trigger a read triggered compaction. A value of -1 prevents sampling and disables read triggered compactions. Geth default is -1. The pebble default is 1 << 4. which gets multiplied with a constant of 1 << 16 to yield 1 << 20 (1MB).") + f.Int(prefix+".max-writer-concurrency", defaultConfig.MaxWriterConcurrency, "maximum number of compression workers the compression queue is allowed to use. If max-writer-concurrency > 0, then the Writer will use parallelism, to compress and write blocks to disk. Otherwise, the writer will compress and write blocks to disk synchronously.") + f.Bool(prefix+".force-writer-parallelism", defaultConfig.ForceWriterParallelism, "force parallelism in the sstable Writer for the metamorphic tests. Even with the MaxWriterConcurrency option set, pebble only enables parallelism in the sstable Writer if there is enough CPU available, and this option bypasses that.") } func (c *PebbleExperimentalConfig) Validate() error { diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 3d938fdfd1..7cfb0cbd31 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -19,19 +19,28 @@ type DBConfig struct { Pebble conf.PebbleConfig `koanf:"pebble"` } -var DBConfigDefault = DBConfig{ - Handles: conf.PersistentConfigDefault.Handles, - Cache: gethexec.DefaultCachingConfig.DatabaseCache, - Pebble: conf.PebbleConfigDefault, +var DBConfigDefaultDst = DBConfig{ + DBEngine: "pebble", + Handles: conf.PersistentConfigDefault.Handles, + Cache: gethexec.DefaultCachingConfig.DatabaseCache, + Namespace: "dstdb/", + Pebble: conf.PebbleConfigDefault, } -func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultNamespace string) { - f.String(prefix+".data", DBConfigDefault.Data, "directory of stored chain state") - f.String(prefix+".db-engine", DBConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") - f.Int(prefix+".handles", DBConfigDefault.Handles, "number of files to be open simultaneously") - f.Int(prefix+".cache", DBConfigDefault.Cache, "the capacity(in megabytes) of the data caching") - f.String(prefix+".namespace", defaultNamespace, "metrics namespace") - conf.PebbleConfigAddOptions(prefix+".pebble", f) +var DBConfigDefaultSrc = DBConfig{ + DBEngine: "leveldb", + Handles: conf.PersistentConfigDefault.Handles, + Cache: gethexec.DefaultCachingConfig.DatabaseCache, + Namespace: "srcdb/", +} + +func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultConfig *DBConfig) { + f.String(prefix+".data", defaultConfig.Data, "directory of stored chain state") + f.String(prefix+".db-engine", defaultConfig.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") + f.Int(prefix+".handles", defaultConfig.Handles, "number of files to be open simultaneously") + f.Int(prefix+".cache", defaultConfig.Cache, "the capacity(in megabytes) of the data caching") + f.String(prefix+".namespace", defaultConfig.Namespace, "metrics namespace") + conf.PebbleConfigAddOptions(prefix+".pebble", f, &defaultConfig.Pebble) } type DBConvConfig struct { @@ -48,6 +57,8 @@ type DBConvConfig struct { } var DefaultDBConvConfig = DBConvConfig{ + Src: DBConfigDefaultSrc, + Dst: DBConfigDefaultDst, IdealBatchSize: 100 * 1024 * 1024, // 100 MB Convert: false, Compact: false, @@ -59,8 +70,8 @@ var DefaultDBConvConfig = DBConvConfig{ } func DBConvConfigAddOptions(f *flag.FlagSet) { - DBConfigAddOptions("src", f, "srcdb/") - DBConfigAddOptions("dst", f, "destdb/") + DBConfigAddOptions("src", f, &DefaultDBConvConfig.Src) + DBConfigAddOptions("dst", f, &DefaultDBConvConfig.Dst) f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size") f.Bool("convert", DefaultDBConvConfig.Convert, "enables conversion step") f.Bool("compact", DefaultDBConvConfig.Compact, "enables compaction step") diff --git a/cmd/dbconv/dbconv/dbconv_test.go b/cmd/dbconv/dbconv/dbconv_test.go index 16d42269f7..f31dd68618 100644 --- a/cmd/dbconv/dbconv/dbconv_test.go +++ b/cmd/dbconv/dbconv/dbconv_test.go @@ -10,13 +10,11 @@ import ( func TestConversion(t *testing.T) { _ = testhelpers.InitTestLog(t, log.LvlTrace) - oldDBConfig := DBConfigDefault + oldDBConfig := DBConfigDefaultSrc oldDBConfig.Data = t.TempDir() - oldDBConfig.DBEngine = "leveldb" - newDBConfig := DBConfigDefault + newDBConfig := DBConfigDefaultDst newDBConfig.Data = t.TempDir() - newDBConfig.DBEngine = "pebble" func() { oldDb, err := openDB(&oldDBConfig, "", false) diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index 5f1619e86c..b811f40347 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -51,15 +51,9 @@ func TestDatabaseConversion(t *testing.T) { err := os.Rename(filepath.Join(instanceDir, dbname), filepath.Join(instanceDir, fmt.Sprintf("%s_old", dbname))) Require(t, err) t.Log("converting:", dbname) - oldDBConfig := dbconv.DBConfigDefault - oldDBConfig.Data = path.Join(instanceDir, fmt.Sprintf("%s_old", dbname)) - oldDBConfig.DBEngine = "leveldb" - newDBConfig := dbconv.DBConfigDefault - newDBConfig.Data = path.Join(instanceDir, dbname) - newDBConfig.DBEngine = "pebble" convConfig := dbconv.DefaultDBConvConfig - convConfig.Src = oldDBConfig - convConfig.Dst = newDBConfig + convConfig.Src.Data = path.Join(instanceDir, fmt.Sprintf("%s_old", dbname)) + convConfig.Dst.Data = path.Join(instanceDir, dbname) conv := dbconv.NewDBConverter(&convConfig) err = conv.Convert(ctx) Require(t, err) From 37a826e75026b0cbf3d3e222bec2730f8019f612 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 28 Jun 2024 17:27:27 +0200 Subject: [PATCH 35/58] dbconv/stats: rename AddBytes/AddEntries to LogBytes/LogEntries --- cmd/dbconv/dbconv/dbconv.go | 14 +++++++------- cmd/dbconv/dbconv/stats.go | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index a98c853c50..d537aa513b 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -64,8 +64,8 @@ func (c *DBConverter) Convert(ctx context.Context) error { if err = batch.Write(); err != nil { return err } - c.stats.AddEntries(int64(entriesInBatch)) - c.stats.AddBytes(int64(batchSize)) + c.stats.LogEntries(int64(entriesInBatch)) + c.stats.LogBytes(int64(batchSize)) batch.Reset() entriesInBatch = 0 } @@ -75,8 +75,8 @@ func (c *DBConverter) Convert(ctx context.Context) error { if err = batch.Write(); err != nil { return err } - c.stats.AddEntries(int64(entriesInBatch)) - c.stats.AddBytes(int64(batchSize)) + c.stats.LogEntries(int64(entriesInBatch)) + c.stats.LogBytes(int64(batchSize)) } return err } @@ -128,7 +128,7 @@ func (c *DBConverter) Verify(ctx context.Context) error { if !has { return fmt.Errorf("Missing key in destination db, key: %v", it.Key()) } - c.stats.AddBytes(int64(len(it.Key()))) + c.stats.LogBytes(int64(len(it.Key()))) case "full": dstValue, err := dst.Get(it.Key()) if err != nil { @@ -137,11 +137,11 @@ func (c *DBConverter) Verify(ctx context.Context) error { if !bytes.Equal(dstValue, it.Value()) { return fmt.Errorf("Value mismatch for key: %v, src value: %v, dst value: %s", it.Key(), it.Value(), dstValue) } - c.stats.AddBytes(int64(len(it.Key()) + len(dstValue))) + c.stats.LogBytes(int64(len(it.Key()) + len(dstValue))) default: return fmt.Errorf("Invalid verify config value: %v", c.config.Verify) } - c.stats.AddEntries(1) + c.stats.LogEntries(1) } return ctx.Err() } diff --git a/cmd/dbconv/dbconv/stats.go b/cmd/dbconv/dbconv/stats.go index 91fc2529d6..729a408f38 100644 --- a/cmd/dbconv/dbconv/stats.go +++ b/cmd/dbconv/dbconv/stats.go @@ -27,7 +27,7 @@ func (s *Stats) Reset() { s.prevBytesTimestamp = now } -func (s *Stats) AddEntries(entries int64) { +func (s *Stats) LogEntries(entries int64) { s.entries.Add(entries) } @@ -35,7 +35,7 @@ func (s *Stats) Entries() int64 { return s.entries.Load() } -func (s *Stats) AddBytes(bytes int64) { +func (s *Stats) LogBytes(bytes int64) { s.bytes.Add(bytes) } From 0ca882a29d1c79a41b0a3023cf346e0250b96998 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Mon, 1 Jul 2024 18:02:01 +0200 Subject: [PATCH 36/58] remove dst dirs when conversion fails --- scripts/convert-databases.bash | 73 ++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index c1803b5d98..1adb70a460 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -8,12 +8,13 @@ src=$DEFAULT_SRC dst= force=false skip_existing=false +clean="all" -l2chaindata_status="n/a" -l2chaindata_ancient_status="n/a" -arbitrumdata_status="n/a" -wasm_status="n/a" -classicmsg_status="n/a" +l2chaindata_status="not started" +l2chaindata_ancient_status="not started" +arbitrumdata_status="not started" +wasm_status="not started" +classicmsg_status="not started" checkMissingValue () { if [[ $1 -eq 0 || $2 == -* ]]; then @@ -40,6 +41,42 @@ echo Usage: $0 \[OPTIONS..\] echo "--dst destination directory" echo "--force remove destination directory if it exists" echo "--skip-existing skip convertion of databases which directories already exist in the destination directory" + echo "--clean sets what should be removed in case of error, possible values:" + echo " \"all\" - remove whole destination directory (default)" + echo " \"failed\" - remove database which conversion failed" + echo " \"none\" - remove nothing, leave unfinished and potentially corrupted databases" +} + +removeDir() { + cmd="rm -r $1" + echo $cmd + $cmd + return $? +} + +removeDir + +cleanup() { + case $clean in + all) + echo "== Removing destination directory" + removeDir "$dst" + ;; + failed) + echo "== Note: removing only failed destination directory" + dstdir=$(echo $dst/$1 | tr -s /) + removeDir $dstdir + ;; + none) + echo "== Warning: not removing destination directories, the destination databases might be incomplete and/or corrupted!" + ;; + *) + # shouldn't happen + echo "Script error, invalid --clean flag value: $clean" + exit 1 + ;; + + esac } while [[ $# -gt 0 ]]; do @@ -70,6 +107,12 @@ while [[ $# -gt 0 ]]; do skip_existing=true shift ;; + --clean) + shift + checkMissingValue $# "$1" "--clean" + clean=$1 + shift + ;; --help) printUsage exit 0 @@ -86,6 +129,12 @@ if $force && $skip_existing; then exit 1 fi +if [ $clean != "all" ] && [ $clean != "failed" ] && [ $clean != "none" ] ; then + echo Error: Invalid --clean value: $clean + printUsage + exit 1 +fi + if ! [ -e "$dbconv" ]; then echo Error: Invalid dbconv binary path: "$dbconv" does not exist exit 1 @@ -121,13 +170,11 @@ fi if [ -e "$dst" ] && ! $skip_existing; then if $force; then - echo == Warning! Destination already exists, --force is set, this will remove all files under path: "$dst" - read -p "are you sure? [y/n]" -n 1 response - echo - if [[ $response == "y" ]] || [[ $response == "Y" ]]; then - (set -x; rm -r "$dst" || exit 1) - else - exit 0 + echo == Warning! Destination already exists, --force is set, removing all files under path: "$dst" + removeDir "$dst" + if [ $? -ne 0 ]; then + echo Error: failed to remove "$dst" + exit 1 fi else echo Error: invalid destination path: "$dst" already exists @@ -145,6 +192,7 @@ convert () { echo $cmd $cmd if [ $? -ne 0 ]; then + cleanup $1 convert_result="FAILED" return 1 fi @@ -179,6 +227,7 @@ if ! [ -e $dst/l2chaindata/ancient ]; then $cmd if [ $? -ne 0 ]; then l2chaindata_ancient_status="FAILED (failed to copy)" + cleanup "l2chaindata" printStatus exit 1 fi From 1d69881630f86d37ef1a3da60b535c4677cffeb5 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Mon, 1 Jul 2024 18:03:55 +0200 Subject: [PATCH 37/58] clean up conver-databases script --- scripts/convert-databases.bash | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index 1adb70a460..7e0e7aba14 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -54,8 +54,6 @@ removeDir() { return $? } -removeDir - cleanup() { case $clean in all) From 752784812d1991df02e3d25434d8bda3dd8aa666 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 3 Jul 2024 15:55:14 +0200 Subject: [PATCH 38/58] add unfinished convertion canary key --- cmd/dbconv/dbconv/dbconv.go | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index d537aa513b..e8f6c61802 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -11,6 +11,8 @@ import ( "github.com/ethereum/go-ethereum/log" ) +var UnfinishedConversionCanaryKey = []byte("unfinished-conversion-canary-key") + type DBConverter struct { config *DBConvConfig stats Stats @@ -23,7 +25,7 @@ func NewDBConverter(config *DBConvConfig) *DBConverter { } func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error) { - return rawdb.Open(rawdb.OpenOptions{ + db, err := rawdb.Open(rawdb.OpenOptions{ Type: config.DBEngine, Directory: config.Data, // we don't open freezer, it doesn't need to be converted as it has format independent of db-engine @@ -35,6 +37,20 @@ func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error ReadOnly: readonly, PebbleExtraOptions: config.Pebble.ExtraOptions(name), }) + if err != nil { + return nil, err + } + unfinished, err := db.Has(UnfinishedConversionCanaryKey) + if err != nil { + return nil, fmt.Errorf("Failed to check canary key existence: %w", err) + } + if unfinished { + if err := db.Close(); err != nil { + return nil, fmt.Errorf("Unfinished conversion canary key detected and failed to close: %w", err) + } + return nil, fmt.Errorf("Unfinished conversion canary key detected") + } + return db, nil } func (c *DBConverter) Convert(ctx context.Context) error { @@ -51,6 +67,9 @@ func (c *DBConverter) Convert(ctx context.Context) error { defer dst.Close() c.stats.Reset() log.Info("Converting database", "src", c.config.Src.Data, "dst", c.config.Dst.Data, "db-engine", c.config.Dst.DBEngine) + if err = dst.Put(UnfinishedConversionCanaryKey, []byte{1}); err != nil { + return err + } it := src.NewIterator(nil, nil) defer it.Release() batch := dst.NewBatch() @@ -78,6 +97,11 @@ func (c *DBConverter) Convert(ctx context.Context) error { c.stats.LogEntries(int64(entriesInBatch)) c.stats.LogBytes(int64(batchSize)) } + if err == nil { + if err = dst.Delete(UnfinishedConversionCanaryKey); err != nil { + return err + } + } return err } From 9745003e289cfca1e22600f4489155e598b0c349 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Wed, 10 Jul 2024 10:51:52 -0500 Subject: [PATCH 39/58] Delete messages from coordinator after they become final --- arbnode/seq_coordinator.go | 80 ++++++++++++++++++++++++++++- arbnode/sync_monitor.go | 8 +++ util/redisutil/redis_coordinator.go | 13 ++--- 3 files changed, 93 insertions(+), 8 deletions(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index cdf1011b11..64b1ef9b81 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -39,6 +39,7 @@ type SeqCoordinator struct { redisutil.RedisCoordinator + sync *SyncMonitor streamer *TransactionStreamer sequencer execution.ExecutionSequencer delayedSequencer *DelayedSequencer @@ -104,7 +105,7 @@ var DefaultSeqCoordinatorConfig = SeqCoordinatorConfig{ RedisUrl: "", LockoutDuration: time.Minute, LockoutSpare: 30 * time.Second, - SeqNumDuration: 24 * time.Hour, + SeqNumDuration: 10 * 24 * time.Hour, UpdateInterval: 250 * time.Millisecond, HandoffTimeout: 30 * time.Second, SafeShutdownDelay: 5 * time.Second, @@ -149,6 +150,7 @@ func NewSeqCoordinator( } coordinator := &SeqCoordinator{ RedisCoordinator: *redisCoordinator, + sync: sync, streamer: streamer, sequencer: sequencer, config: config, @@ -338,6 +340,14 @@ func (c *SeqCoordinator) acquireLockoutAndWriteMessage(ctx context.Context, msgC return nil } +func (c *SeqCoordinator) getRemoteFinalizedMsgCount(ctx context.Context) (arbutil.MessageIndex, error) { + resStr, err := c.Client.Get(ctx, redisutil.FINALIZED_MSG_COUNT_KEY).Result() + if err != nil { + return 0, err + } + return c.signedBytesToMsgCount(ctx, []byte(resStr)) +} + func (c *SeqCoordinator) getRemoteMsgCountImpl(ctx context.Context, r redis.Cmdable) (arbutil.MessageIndex, error) { resStr, err := r.Get(ctx, redisutil.MSG_COUNT_KEY).Result() if errors.Is(err, redis.Nil) { @@ -473,6 +483,10 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin return c.noRedisError() } // Was, and still is, the active sequencer + // Before proceeding, first try deleting finalized messages from redis and setting the finalizedMsgCount key + if err := c.deleteFinalizedMsgsFromRedis(ctx); err != nil { + log.Warn("Coordinator failed to delete finalized messages from redis", "err", err) + } // We leave a margin of error of either a five times the update interval or a fifth of the lockout duration, whichever is greater. marginOfError := arbmath.MaxInt(c.config.LockoutDuration/5, c.config.UpdateInterval*5) if time.Now().Add(marginOfError).Before(atomicTimeRead(&c.lockoutUntil)) { @@ -492,6 +506,64 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin return c.noRedisError() } +func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context) error { + finalized, err := c.sync.GetFinalizedMsgCount(ctx) + if err != nil || finalized == 0 { + return fmt.Errorf("finalizedMessageCount is zero or error getting finalizedMessageCount from syncMonitor: %w", err) + } + updateFinalizedMsgCount := func() error { + finalizedBytes, err := c.msgCountToSignedBytes(finalized) + if err != nil { + return err + } + if err = c.Client.Set(ctx, redisutil.FINALIZED_MSG_COUNT_KEY, finalizedBytes, c.config.SeqNumDuration).Err(); err != nil { + return fmt.Errorf("couldn't set %s key to current finalizedMsgCount in redis: %w", redisutil.FINALIZED_MSG_COUNT_KEY, err) + } + return nil + } + prevFinalized, err := c.getRemoteFinalizedMsgCount(ctx) + if err != nil { + if errors.Is(err, redis.Nil) { + var keys []string + for msg := finalized; ; msg-- { + exists, err := c.Client.Exists(ctx, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)).Result() + if exists == 0 || err != nil { + break + } + keys = append(keys, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)) + } + // If there is an error deleting finalized messages during init, we retry later either from this sequencer or from another + if len(keys) > 0 { + log.Info("Initializing finalizedMsgCount and deleting finalized messages from redis", "finalizedMsgCount", finalized) + if err := c.Client.Del(ctx, keys...).Err(); err != nil { + return fmt.Errorf("error deleting finalized message and their signatures from redis during init of finalizedMsgCount: %w", err) + } + } + return updateFinalizedMsgCount() + } + return fmt.Errorf("error getting finalizedMsgCount value from redis: %w", err) + } + remoteMsgCount, err := c.GetRemoteMsgCount() + if err != nil { + return fmt.Errorf("cannot get remote message count: %w", err) + } + msgToDelete := finalized + if msgToDelete > remoteMsgCount { + msgToDelete = remoteMsgCount + } + if prevFinalized < msgToDelete { + var keys []string + for msg := prevFinalized + 1; msg <= msgToDelete; msg++ { + keys = append(keys, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)) + } + if err := c.Client.Del(ctx, keys...).Err(); err != nil { + return fmt.Errorf("error deleting finalized message and their signatures from redis: %w", err) + } + return updateFinalizedMsgCount() + } + return nil +} + func (c *SeqCoordinator) update(ctx context.Context) time.Duration { chosenSeq, err := c.RecommendSequencerWantingLockout(ctx) if err != nil { @@ -522,6 +594,10 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { log.Error("cannot read message count", "err", err) return c.config.UpdateInterval } + remoteFinalizedMsgCount, err := c.getRemoteFinalizedMsgCount(ctx) + if err != nil { + log.Warn("Cannot get remote finalized message count, might encounter failed to read message warnings later", "err", err) + } remoteMsgCount, err := c.GetRemoteMsgCount() if err != nil { log.Warn("cannot get remote message count", "err", err) @@ -534,7 +610,7 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { var messages []arbostypes.MessageWithMetadata msgToRead := localMsgCount var msgReadErr error - for msgToRead < readUntil { + for msgToRead < readUntil && localMsgCount >= remoteFinalizedMsgCount { var resString string resString, msgReadErr = c.Client.Get(ctx, redisutil.MessageKeyFor(msgToRead)).Result() if msgReadErr != nil { diff --git a/arbnode/sync_monitor.go b/arbnode/sync_monitor.go index d3b9a7e1c6..27da6b7331 100644 --- a/arbnode/sync_monitor.go +++ b/arbnode/sync_monitor.go @@ -2,6 +2,7 @@ package arbnode import ( "context" + "errors" "sync" "time" @@ -72,6 +73,13 @@ func (s *SyncMonitor) SyncTargetMessageCount() arbutil.MessageIndex { return s.syncTarget } +func (s *SyncMonitor) GetFinalizedMsgCount(ctx context.Context) (arbutil.MessageIndex, error) { + if s.inboxReader != nil && s.inboxReader.l1Reader != nil { + return s.inboxReader.GetFinalizedMsgCount(ctx) + } + return 0, errors.New("sync monitor's GetFinalizedMsgCount method is unsupported, try starting node with --parent-chain.connection.url") +} + func (s *SyncMonitor) maxMessageCount() (arbutil.MessageIndex, error) { msgCount, err := s.txStreamer.GetMessageCount() if err != nil { diff --git a/util/redisutil/redis_coordinator.go b/util/redisutil/redis_coordinator.go index 59e3b0e0f9..2c12ffec50 100644 --- a/util/redisutil/redis_coordinator.go +++ b/util/redisutil/redis_coordinator.go @@ -13,12 +13,13 @@ import ( "github.com/offchainlabs/nitro/arbutil" ) -const CHOSENSEQ_KEY string = "coordinator.chosen" // Never overwritten. Expires or released only -const MSG_COUNT_KEY string = "coordinator.msgCount" // Only written by sequencer holding CHOSEN key -const PRIORITIES_KEY string = "coordinator.priorities" // Read only -const WANTS_LOCKOUT_KEY_PREFIX string = "coordinator.liveliness." // Per server. Only written by self -const MESSAGE_KEY_PREFIX string = "coordinator.msg." // Per Message. Only written by sequencer holding CHOSEN -const SIGNATURE_KEY_PREFIX string = "coordinator.msg.sig." // Per Message. Only written by sequencer holding CHOSEN +const CHOSENSEQ_KEY string = "coordinator.chosen" // Never overwritten. Expires or released only +const MSG_COUNT_KEY string = "coordinator.msgCount" // Only written by sequencer holding CHOSEN key +const FINALIZED_MSG_COUNT_KEY string = "coordinator.finalizedMsgCount" // Only written by sequencer holding CHOSEN key +const PRIORITIES_KEY string = "coordinator.priorities" // Read only +const WANTS_LOCKOUT_KEY_PREFIX string = "coordinator.liveliness." // Per server. Only written by self +const MESSAGE_KEY_PREFIX string = "coordinator.msg." // Per Message. Only written by sequencer holding CHOSEN +const SIGNATURE_KEY_PREFIX string = "coordinator.msg.sig." // Per Message. Only written by sequencer holding CHOSEN const WANTS_LOCKOUT_VAL string = "OK" const INVALID_VAL string = "INVALID" const INVALID_URL string = "" From ab8e6a8d0d4c80ffdca71803fd6e4d8efe2d2629 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Mon, 15 Jul 2024 17:47:33 -0500 Subject: [PATCH 40/58] address PR comments --- arbnode/seq_coordinator.go | 54 +++++++++++++++++--------------------- arbnode/sync_monitor.go | 3 +-- cmd/nitro/nitro.go | 4 +++ 3 files changed, 29 insertions(+), 32 deletions(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index 64b1ef9b81..5fd604842f 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -484,7 +484,12 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin } // Was, and still is, the active sequencer // Before proceeding, first try deleting finalized messages from redis and setting the finalizedMsgCount key - if err := c.deleteFinalizedMsgsFromRedis(ctx); err != nil { + finalized, err := c.sync.GetFinalizedMsgCount(ctx) + if err != nil { + log.Warn("Error getting finalizedMessageCount from syncMonitor: %w", err) + } else if finalized != 0 { + log.Warn("SyncMonitor returned zero finalizedMessageCount") + } else if err := c.deleteFinalizedMsgsFromRedis(ctx, finalized); err != nil { log.Warn("Coordinator failed to delete finalized messages from redis", "err", err) } // We leave a margin of error of either a five times the update interval or a fifth of the lockout duration, whichever is greater. @@ -506,11 +511,7 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin return c.noRedisError() } -func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context) error { - finalized, err := c.sync.GetFinalizedMsgCount(ctx) - if err != nil || finalized == 0 { - return fmt.Errorf("finalizedMessageCount is zero or error getting finalizedMessageCount from syncMonitor: %w", err) - } +func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, finalized arbutil.MessageIndex) error { updateFinalizedMsgCount := func() error { finalizedBytes, err := c.msgCountToSignedBytes(finalized) if err != nil { @@ -522,35 +523,31 @@ func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context) error return nil } prevFinalized, err := c.getRemoteFinalizedMsgCount(ctx) - if err != nil { - if errors.Is(err, redis.Nil) { - var keys []string - for msg := finalized; ; msg-- { - exists, err := c.Client.Exists(ctx, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)).Result() - if exists == 0 || err != nil { - break - } - keys = append(keys, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)) + if errors.Is(err, redis.Nil) { + var keys []string + for msg := finalized; ; msg-- { + exists, err := c.Client.Exists(ctx, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)).Result() + if exists == 0 || err != nil { + break } - // If there is an error deleting finalized messages during init, we retry later either from this sequencer or from another - if len(keys) > 0 { - log.Info("Initializing finalizedMsgCount and deleting finalized messages from redis", "finalizedMsgCount", finalized) - if err := c.Client.Del(ctx, keys...).Err(); err != nil { - return fmt.Errorf("error deleting finalized message and their signatures from redis during init of finalizedMsgCount: %w", err) - } + keys = append(keys, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)) + } + // If there is an error deleting finalized messages during init, we retry later either from this sequencer or from another + if len(keys) > 0 { + log.Info("Initializing finalizedMsgCount and deleting finalized messages from redis", "finalizedMsgCount", finalized) + if err := c.Client.Del(ctx, keys...).Err(); err != nil { + return fmt.Errorf("error deleting finalized message and their signatures from redis during init of finalizedMsgCount: %w", err) } - return updateFinalizedMsgCount() } + return updateFinalizedMsgCount() + } else if err != nil { return fmt.Errorf("error getting finalizedMsgCount value from redis: %w", err) } remoteMsgCount, err := c.GetRemoteMsgCount() if err != nil { return fmt.Errorf("cannot get remote message count: %w", err) } - msgToDelete := finalized - if msgToDelete > remoteMsgCount { - msgToDelete = remoteMsgCount - } + msgToDelete := min(finalized, remoteMsgCount) if prevFinalized < msgToDelete { var keys []string for msg := prevFinalized + 1; msg <= msgToDelete; msg++ { @@ -603,10 +600,7 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { log.Warn("cannot get remote message count", "err", err) return c.retryAfterRedisError() } - readUntil := remoteMsgCount - if readUntil > localMsgCount+c.config.MsgPerPoll { - readUntil = localMsgCount + c.config.MsgPerPoll - } + readUntil := min(localMsgCount+c.config.MsgPerPoll, remoteMsgCount) var messages []arbostypes.MessageWithMetadata msgToRead := localMsgCount var msgReadErr error diff --git a/arbnode/sync_monitor.go b/arbnode/sync_monitor.go index 27da6b7331..5ab1ede2d6 100644 --- a/arbnode/sync_monitor.go +++ b/arbnode/sync_monitor.go @@ -2,7 +2,6 @@ package arbnode import ( "context" - "errors" "sync" "time" @@ -77,7 +76,7 @@ func (s *SyncMonitor) GetFinalizedMsgCount(ctx context.Context) (arbutil.Message if s.inboxReader != nil && s.inboxReader.l1Reader != nil { return s.inboxReader.GetFinalizedMsgCount(ctx) } - return 0, errors.New("sync monitor's GetFinalizedMsgCount method is unsupported, try starting node with --parent-chain.connection.url") + return 0, nil } func (s *SyncMonitor) maxMessageCount() (arbutil.MessageIndex, error) { diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 04bdeb3228..45cb6a01c7 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -232,6 +232,10 @@ func mainImpl() int { if nodeConfig.Execution.Sequencer.Enable != nodeConfig.Node.Sequencer { log.Error("consensus and execution must agree if sequencing is enabled or not", "Execution.Sequencer.Enable", nodeConfig.Execution.Sequencer.Enable, "Node.Sequencer", nodeConfig.Node.Sequencer) } + if nodeConfig.Node.SeqCoordinator.Enable && !nodeConfig.Node.ParentChainReader.Enable { + log.Error("Sequencer coordinator must be enabled with parent chain reader, try starting node with --parent-chain.connection.url") + return 1 + } var dataSigner signature.DataSignerFunc var l1TransactionOptsValidator *bind.TransactOpts From 4cff8b3f4336347845e476460fff2fe8ff86dd79 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Tue, 16 Jul 2024 10:56:03 -0500 Subject: [PATCH 41/58] add test for deleteFinalizedMsgsFromRedis --- arbnode/seq_coordinator.go | 8 +- ...atomic_test.go => seq_coordinator_test.go} | 84 +++++++++++++++++++ 2 files changed, 88 insertions(+), 4 deletions(-) rename arbnode/{seq_coordinator_atomic_test.go => seq_coordinator_test.go} (60%) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index 5fd604842f..2f5e724f61 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -525,7 +525,7 @@ func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, final prevFinalized, err := c.getRemoteFinalizedMsgCount(ctx) if errors.Is(err, redis.Nil) { var keys []string - for msg := finalized; ; msg-- { + for msg := finalized; msg > 0; msg-- { exists, err := c.Client.Exists(ctx, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)).Result() if exists == 0 || err != nil { break @@ -543,11 +543,11 @@ func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, final } else if err != nil { return fmt.Errorf("error getting finalizedMsgCount value from redis: %w", err) } - remoteMsgCount, err := c.GetRemoteMsgCount() + remoteMsgCount, err := c.getRemoteMsgCountImpl(ctx, c.Client) if err != nil { return fmt.Errorf("cannot get remote message count: %w", err) } - msgToDelete := min(finalized, remoteMsgCount) + msgToDelete := min(finalized, remoteMsgCount-1) if prevFinalized < msgToDelete { var keys []string for msg := prevFinalized + 1; msg <= msgToDelete; msg++ { @@ -604,7 +604,7 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { var messages []arbostypes.MessageWithMetadata msgToRead := localMsgCount var msgReadErr error - for msgToRead < readUntil && localMsgCount >= remoteFinalizedMsgCount { + for msgToRead < readUntil && localMsgCount > remoteFinalizedMsgCount { var resString string resString, msgReadErr = c.Client.Get(ctx, redisutil.MessageKeyFor(msgToRead)).Result() if msgReadErr != nil { diff --git a/arbnode/seq_coordinator_atomic_test.go b/arbnode/seq_coordinator_test.go similarity index 60% rename from arbnode/seq_coordinator_atomic_test.go rename to arbnode/seq_coordinator_test.go index 61468a3adb..6fa08ce7a1 100644 --- a/arbnode/seq_coordinator_atomic_test.go +++ b/arbnode/seq_coordinator_test.go @@ -156,3 +156,87 @@ func TestRedisSeqCoordinatorAtomic(t *testing.T) { } } + +func TestSeqCoordinatorDeletesFinalizedMessages(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + coordConfig := TestSeqCoordinatorConfig + coordConfig.LockoutDuration = time.Millisecond * 100 + coordConfig.LockoutSpare = time.Millisecond * 10 + coordConfig.Signer.ECDSA.AcceptSequencer = false + coordConfig.Signer.SymmetricFallback = true + coordConfig.Signer.SymmetricSign = true + coordConfig.Signer.Symmetric.Dangerous.DisableSignatureVerification = true + coordConfig.Signer.Symmetric.SigningKey = "" + + nullSigner, err := signature.NewSignVerify(&coordConfig.Signer, nil, nil) + Require(t, err) + + redisUrl := redisutil.CreateTestRedis(ctx, t) + coordConfig.RedisUrl = redisUrl + + config := coordConfig + config.MyUrl = "test" + redisCoordinator, err := redisutil.NewRedisCoordinator(config.RedisUrl) + Require(t, err) + coordinator := &SeqCoordinator{ + RedisCoordinator: *redisCoordinator, + config: config, + signer: nullSigner, + } + + // Add messages to redis + var keys []string + msgBytes, err := coordinator.msgCountToSignedBytes(0) + Require(t, err) + for i := arbutil.MessageIndex(1); i <= 10; i++ { + err = coordinator.Client.Set(ctx, redisutil.MessageKeyFor(i), msgBytes, time.Hour).Err() + Require(t, err) + err = coordinator.Client.Set(ctx, redisutil.MessageSigKeyFor(i), msgBytes, time.Hour).Err() + Require(t, err) + keys = append(keys, redisutil.MessageKeyFor(i), redisutil.MessageSigKeyFor(i)) + } + // Set msgCount key + msgCountBytes, err := coordinator.msgCountToSignedBytes(11) + Require(t, err) + err = coordinator.Client.Set(ctx, redisutil.MSG_COUNT_KEY, msgCountBytes, time.Hour).Err() + Require(t, err) + exists, err := coordinator.Client.Exists(ctx, keys...).Result() + Require(t, err) + if exists != 20 { + t.Fatal("couldn't find all messages and signatures in redis") + } + + // Set finalizedMsgCount and delete finalized messages + err = coordinator.deleteFinalizedMsgsFromRedis(ctx, 5) + Require(t, err) + + // Check if messages and signatures were deleted successfully + exists, err = coordinator.Client.Exists(ctx, keys[:10]...).Result() + Require(t, err) + if exists != 0 { + t.Fatal("finalized messages and signatures in range 1 to 5 were not deleted") + } + + // Check if finalizedMsgCount was set to correct value + finalized, err := coordinator.getRemoteFinalizedMsgCount(ctx) + Require(t, err) + if finalized != 5 { + t.Fatalf("incorrect finalizedMsgCount, want: 5, have: %d", finalized) + } + + // Try deleting finalized messages when theres already a finalizedMsgCount + err = coordinator.deleteFinalizedMsgsFromRedis(ctx, 7) + Require(t, err) + exists, err = coordinator.Client.Exists(ctx, keys[10:14]...).Result() + Require(t, err) + if exists != 0 { + t.Fatal("finalized messages and signatures in range 6 to 7 were not deleted") + } + finalized, err = coordinator.getRemoteFinalizedMsgCount(ctx) + Require(t, err) + if finalized != 7 { + t.Fatalf("incorrect finalizedMsgCount, want: 7, have: %d", finalized) + } +} From e6cd9a4c199d43a95ab6c06134697073d6914800 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Tue, 16 Jul 2024 11:18:35 -0500 Subject: [PATCH 42/58] minor bug fix- finalizedMsgCount should be non-inclusive --- arbnode/seq_coordinator.go | 8 ++++---- arbnode/seq_coordinator_test.go | 15 +++++++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index 53d430e584..0e1344cb08 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -487,7 +487,7 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin finalized, err := c.sync.GetFinalizedMsgCount(ctx) if err != nil { log.Warn("Error getting finalizedMessageCount from syncMonitor: %w", err) - } else if finalized != 0 { + } else if finalized == 0 { log.Warn("SyncMonitor returned zero finalizedMessageCount") } else if err := c.deleteFinalizedMsgsFromRedis(ctx, finalized); err != nil { log.Warn("Coordinator failed to delete finalized messages from redis", "err", err) @@ -525,7 +525,7 @@ func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, final prevFinalized, err := c.getRemoteFinalizedMsgCount(ctx) if errors.Is(err, redis.Nil) { var keys []string - for msg := finalized; msg > 0; msg-- { + for msg := finalized - 1; msg > 0; msg-- { exists, err := c.Client.Exists(ctx, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)).Result() if exists == 0 || err != nil { break @@ -547,10 +547,10 @@ func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, final if err != nil { return fmt.Errorf("cannot get remote message count: %w", err) } - msgToDelete := min(finalized, remoteMsgCount-1) + msgToDelete := min(finalized, remoteMsgCount) if prevFinalized < msgToDelete { var keys []string - for msg := prevFinalized + 1; msg <= msgToDelete; msg++ { + for msg := prevFinalized; msg < msgToDelete; msg++ { keys = append(keys, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)) } if err := c.Client.Del(ctx, keys...).Err(); err != nil { diff --git a/arbnode/seq_coordinator_test.go b/arbnode/seq_coordinator_test.go index 64feeff29c..6498543f3a 100644 --- a/arbnode/seq_coordinator_test.go +++ b/arbnode/seq_coordinator_test.go @@ -213,10 +213,10 @@ func TestSeqCoordinatorDeletesFinalizedMessages(t *testing.T) { Require(t, err) // Check if messages and signatures were deleted successfully - exists, err = coordinator.Client.Exists(ctx, keys[:10]...).Result() + exists, err = coordinator.Client.Exists(ctx, keys[:8]...).Result() Require(t, err) if exists != 0 { - t.Fatal("finalized messages and signatures in range 1 to 5 were not deleted") + t.Fatal("finalized messages and signatures in range 1 to 4 were not deleted") } // Check if finalizedMsgCount was set to correct value @@ -229,14 +229,21 @@ func TestSeqCoordinatorDeletesFinalizedMessages(t *testing.T) { // Try deleting finalized messages when theres already a finalizedMsgCount err = coordinator.deleteFinalizedMsgsFromRedis(ctx, 7) Require(t, err) - exists, err = coordinator.Client.Exists(ctx, keys[10:14]...).Result() + exists, err = coordinator.Client.Exists(ctx, keys[8:12]...).Result() Require(t, err) if exists != 0 { - t.Fatal("finalized messages and signatures in range 6 to 7 were not deleted") + t.Fatal("finalized messages and signatures in range 5 to 6 were not deleted") } finalized, err = coordinator.getRemoteFinalizedMsgCount(ctx) Require(t, err) if finalized != 7 { t.Fatalf("incorrect finalizedMsgCount, want: 7, have: %d", finalized) } + + // Check that non-finalized messages are still available in redis + exists, err = coordinator.Client.Exists(ctx, keys[12:]...).Result() + Require(t, err) + if exists != 8 { + t.Fatal("non-finalized messages and signatures in range 7 to 10 are not fully available") + } } From 08c7e9e67df4c719077526c31c901d667541b3b3 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Wed, 31 Jul 2024 11:43:32 +0530 Subject: [PATCH 43/58] code refactor --- arbnode/seq_coordinator.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index 0e1344cb08..bc2ef93991 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -512,7 +512,12 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin } func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, finalized arbutil.MessageIndex) error { - updateFinalizedMsgCount := func() error { + deleteMsgsAndUpdateFinalizedMsgCount := func(keys []string) error { + if len(keys) > 0 { + if err := c.Client.Del(ctx, keys...).Err(); err != nil { + return fmt.Errorf("error deleting finalized messages and their signatures from redis: %w", err) + } + } finalizedBytes, err := c.msgCountToSignedBytes(finalized) if err != nil { return err @@ -527,19 +532,17 @@ func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, final var keys []string for msg := finalized - 1; msg > 0; msg-- { exists, err := c.Client.Exists(ctx, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)).Result() - if exists == 0 || err != nil { + if err != nil { + // If there is an error deleting finalized messages during init, we retry later either from this sequencer or from another + return err + } + if exists == 0 { break } keys = append(keys, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)) } - // If there is an error deleting finalized messages during init, we retry later either from this sequencer or from another - if len(keys) > 0 { - log.Info("Initializing finalizedMsgCount and deleting finalized messages from redis", "finalizedMsgCount", finalized) - if err := c.Client.Del(ctx, keys...).Err(); err != nil { - return fmt.Errorf("error deleting finalized message and their signatures from redis during init of finalizedMsgCount: %w", err) - } - } - return updateFinalizedMsgCount() + log.Info("Initializing finalizedMsgCount and deleting finalized messages from redis", "finalizedMsgCount", finalized) + return deleteMsgsAndUpdateFinalizedMsgCount(keys) } else if err != nil { return fmt.Errorf("error getting finalizedMsgCount value from redis: %w", err) } @@ -553,10 +556,7 @@ func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, final for msg := prevFinalized; msg < msgToDelete; msg++ { keys = append(keys, redisutil.MessageKeyFor(msg), redisutil.MessageSigKeyFor(msg)) } - if err := c.Client.Del(ctx, keys...).Err(); err != nil { - return fmt.Errorf("error deleting finalized message and their signatures from redis: %w", err) - } - return updateFinalizedMsgCount() + return deleteMsgsAndUpdateFinalizedMsgCount(keys) } return nil } From 379bf7e50ee71b203dae71dd12273062ba6f3dcc Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Thu, 1 Aug 2024 21:08:37 +0530 Subject: [PATCH 44/58] minor fix --- arbnode/seq_coordinator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index bc2ef93991..dad271c168 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -604,7 +604,7 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { var messages []arbostypes.MessageWithMetadata msgToRead := localMsgCount var msgReadErr error - for msgToRead < readUntil && localMsgCount > remoteFinalizedMsgCount { + for msgToRead < readUntil && localMsgCount >= remoteFinalizedMsgCount { var resString string resString, msgReadErr = c.Client.Get(ctx, redisutil.MessageKeyFor(msgToRead)).Result() if msgReadErr != nil { From 21f2ee4a8ae2f0a4eb39b59fa389e1fa24b84f52 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 7 Aug 2024 16:19:07 +0200 Subject: [PATCH 45/58] enable archive mode for HashScheme only in db conversion system test --- system_tests/db_conversion_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index b811f40347..3c0b38ca73 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie" "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" @@ -20,7 +21,9 @@ func TestDatabaseConversion(t *testing.T) { builder := NewNodeBuilder(ctx).DefaultConfig(t, true) builder.l2StackConfig.DBEngine = "leveldb" builder.l2StackConfig.Name = "testl2" - builder.execConfig.Caching.Archive = true + if builder.execConfig.Caching.StateScheme == rawdb.HashScheme { + builder.execConfig.Caching.Archive = true + } _ = builder.Build(t) dataDir := builder.dataDir l2CleanupDone := false From 4848a05a60bf0a70e5e63908dab1bc7f67e90977 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Thu, 8 Aug 2024 16:37:09 +0530 Subject: [PATCH 46/58] address PR comments --- arbnode/seq_coordinator.go | 70 +++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index dad271c168..e3eb9f1df1 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -70,9 +70,10 @@ type SeqCoordinatorConfig struct { SafeShutdownDelay time.Duration `koanf:"safe-shutdown-delay"` ReleaseRetries int `koanf:"release-retries"` // Max message per poll. - MsgPerPoll arbutil.MessageIndex `koanf:"msg-per-poll"` - MyUrl string `koanf:"my-url"` - Signer signature.SignVerifyConfig `koanf:"signer"` + MsgPerPoll arbutil.MessageIndex `koanf:"msg-per-poll"` + MyUrl string `koanf:"my-url"` + DeleteFinalizedMsgs bool `koanf:"delete-finalized-msgs"` + Signer signature.SignVerifyConfig `koanf:"signer"` } func (c *SeqCoordinatorConfig) Url() string { @@ -96,6 +97,7 @@ func SeqCoordinatorConfigAddOptions(prefix string, f *flag.FlagSet) { f.Int(prefix+".release-retries", DefaultSeqCoordinatorConfig.ReleaseRetries, "the number of times to retry releasing the wants lockout and chosen one status on shutdown") f.Uint64(prefix+".msg-per-poll", uint64(DefaultSeqCoordinatorConfig.MsgPerPoll), "will only be marked as wanting the lockout if not too far behind") f.String(prefix+".my-url", DefaultSeqCoordinatorConfig.MyUrl, "url for this sequencer if it is the chosen") + f.Bool(prefix+".delete-finalized-msgs", DefaultSeqCoordinatorConfig.DeleteFinalizedMsgs, "enable deleting of finalized messages from redis") signature.SignVerifyConfigAddOptions(prefix+".signer", f) } @@ -113,23 +115,25 @@ var DefaultSeqCoordinatorConfig = SeqCoordinatorConfig{ RetryInterval: 50 * time.Millisecond, MsgPerPoll: 2000, MyUrl: redisutil.INVALID_URL, + DeleteFinalizedMsgs: true, Signer: signature.DefaultSignVerifyConfig, } var TestSeqCoordinatorConfig = SeqCoordinatorConfig{ - Enable: false, - RedisUrl: "", - LockoutDuration: time.Second * 2, - LockoutSpare: time.Millisecond * 10, - SeqNumDuration: time.Minute * 10, - UpdateInterval: time.Millisecond * 10, - HandoffTimeout: time.Millisecond * 200, - SafeShutdownDelay: time.Millisecond * 100, - ReleaseRetries: 4, - RetryInterval: time.Millisecond * 3, - MsgPerPoll: 20, - MyUrl: redisutil.INVALID_URL, - Signer: signature.DefaultSignVerifyConfig, + Enable: false, + RedisUrl: "", + LockoutDuration: time.Second * 2, + LockoutSpare: time.Millisecond * 10, + SeqNumDuration: time.Minute * 10, + UpdateInterval: time.Millisecond * 10, + HandoffTimeout: time.Millisecond * 200, + SafeShutdownDelay: time.Millisecond * 100, + ReleaseRetries: 4, + RetryInterval: time.Millisecond * 3, + MsgPerPoll: 20, + MyUrl: redisutil.INVALID_URL, + DeleteFinalizedMsgs: true, + Signer: signature.DefaultSignVerifyConfig, } func NewSeqCoordinator( @@ -483,14 +487,16 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin return c.noRedisError() } // Was, and still is, the active sequencer - // Before proceeding, first try deleting finalized messages from redis and setting the finalizedMsgCount key - finalized, err := c.sync.GetFinalizedMsgCount(ctx) - if err != nil { - log.Warn("Error getting finalizedMessageCount from syncMonitor: %w", err) - } else if finalized == 0 { - log.Warn("SyncMonitor returned zero finalizedMessageCount") - } else if err := c.deleteFinalizedMsgsFromRedis(ctx, finalized); err != nil { - log.Warn("Coordinator failed to delete finalized messages from redis", "err", err) + if c.config.DeleteFinalizedMsgs { + // Before proceeding, first try deleting finalized messages from redis and setting the finalizedMsgCount key + finalized, err := c.sync.GetFinalizedMsgCount(ctx) + if err != nil { + log.Warn("Error getting finalizedMessageCount from syncMonitor: %w", err) + } else if finalized == 0 { + log.Warn("SyncMonitor returned zero finalizedMessageCount") + } else if err := c.deleteFinalizedMsgsFromRedis(ctx, finalized); err != nil { + log.Warn("Coordinator failed to delete finalized messages from redis", "err", err) + } } // We leave a margin of error of either a five times the update interval or a fifth of the lockout duration, whichever is greater. marginOfError := arbmath.MaxInt(c.config.LockoutDuration/5, c.config.UpdateInterval*5) @@ -514,8 +520,14 @@ func (c *SeqCoordinator) updateWithLockout(ctx context.Context, nextChosen strin func (c *SeqCoordinator) deleteFinalizedMsgsFromRedis(ctx context.Context, finalized arbutil.MessageIndex) error { deleteMsgsAndUpdateFinalizedMsgCount := func(keys []string) error { if len(keys) > 0 { - if err := c.Client.Del(ctx, keys...).Err(); err != nil { - return fmt.Errorf("error deleting finalized messages and their signatures from redis: %w", err) + // To support cases during init we delete keys from reverse (i.e lowest seq num first), so that even if deletion fails in one of the iterations + // next time deleteFinalizedMsgsFromRedis is called we dont miss undeleted messages, as exists is checked from higher seqnum to lower. + // In non-init cases it doesn't matter how we delete as we always try to delete from prevFinalized to finalized + batchDeleteCount := 1000 + for i := len(keys); i > 0; i -= batchDeleteCount { + if err := c.Client.Del(ctx, keys[max(0, i-batchDeleteCount):i]...).Err(); err != nil { + return fmt.Errorf("error deleting finalized messages and their signatures from redis: %w", err) + } } } finalizedBytes, err := c.msgCountToSignedBytes(finalized) @@ -593,7 +605,11 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { } remoteFinalizedMsgCount, err := c.getRemoteFinalizedMsgCount(ctx) if err != nil { - log.Warn("Cannot get remote finalized message count, might encounter failed to read message warnings later", "err", err) + loglevel := log.Error + if err == redis.Nil { + loglevel = log.Debug + } + loglevel("Cannot get remote finalized message count, might encounter failed to read message warnings later", "err", err) } remoteMsgCount, err := c.GetRemoteMsgCount() if err != nil { From 45eaee937046d45ee52334f879ec66055c4d585c Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Thu, 8 Aug 2024 16:39:33 +0530 Subject: [PATCH 47/58] fix lint --- arbnode/seq_coordinator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index e3eb9f1df1..a582b64ffa 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -606,7 +606,7 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { remoteFinalizedMsgCount, err := c.getRemoteFinalizedMsgCount(ctx) if err != nil { loglevel := log.Error - if err == redis.Nil { + if errors.Is(err, redis.Nil) { loglevel = log.Debug } loglevel("Cannot get remote finalized message count, might encounter failed to read message warnings later", "err", err) From 7895656eea9a148a843e68935920d8c7829eea17 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 13 Aug 2024 15:11:44 +0200 Subject: [PATCH 48/58] check for canary key when initializing databases --- cmd/conf/init.go | 3 +-- cmd/dbconv/dbconv/config.go | 5 ++-- cmd/dbconv/dbconv/dbconv.go | 31 ++++++++++++++++--------- cmd/nitro/init.go | 22 ++++++++---------- cmd/nitro/init_test.go | 32 -------------------------- cmd/nitro/nitro.go | 5 ++++ execution/gethexec/node.go | 11 +++++++-- util/dbutil/dbutil.go | 16 +++++++++++++ util/dbutil/dbutil_test.go | 46 +++++++++++++++++++++++++++++++++++++ 9 files changed, 109 insertions(+), 62 deletions(-) create mode 100644 util/dbutil/dbutil_test.go diff --git a/cmd/conf/init.go b/cmd/conf/init.go index a3b5504077..d88bcdd241 100644 --- a/cmd/conf/init.go +++ b/cmd/conf/init.go @@ -7,7 +7,6 @@ import ( "time" "github.com/ethereum/go-ethereum/log" - "github.com/offchainlabs/nitro/execution/gethexec" "github.com/spf13/pflag" ) @@ -55,7 +54,7 @@ var InitConfigDefault = InitConfig{ Prune: "", PruneBloomSize: 2048, PruneThreads: runtime.NumCPU(), - PruneTrieCleanCache: gethexec.DefaultCachingConfig.TrieCleanCache, + PruneTrieCleanCache: 600, RecreateMissingStateFrom: 0, // 0 = disabled RebuildLocalWasm: true, ReorgToBatch: -1, diff --git a/cmd/dbconv/dbconv/config.go b/cmd/dbconv/dbconv/config.go index 7cfb0cbd31..74623bc264 100644 --- a/cmd/dbconv/dbconv/config.go +++ b/cmd/dbconv/dbconv/config.go @@ -6,7 +6,6 @@ import ( "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/genericconf" - "github.com/offchainlabs/nitro/execution/gethexec" flag "github.com/spf13/pflag" ) @@ -22,7 +21,7 @@ type DBConfig struct { var DBConfigDefaultDst = DBConfig{ DBEngine: "pebble", Handles: conf.PersistentConfigDefault.Handles, - Cache: gethexec.DefaultCachingConfig.DatabaseCache, + Cache: 2048, // 2048 MB Namespace: "dstdb/", Pebble: conf.PebbleConfigDefault, } @@ -30,7 +29,7 @@ var DBConfigDefaultDst = DBConfig{ var DBConfigDefaultSrc = DBConfig{ DBEngine: "leveldb", Handles: conf.PersistentConfigDefault.Handles, - Cache: gethexec.DefaultCachingConfig.DatabaseCache, + Cache: 2048, // 2048 MB Namespace: "srcdb/", } diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index e8f6c61802..a21bed13d9 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -3,6 +3,7 @@ package dbconv import ( "bytes" "context" + "errors" "fmt" "time" @@ -11,7 +12,18 @@ import ( "github.com/ethereum/go-ethereum/log" ) -var UnfinishedConversionCanaryKey = []byte("unfinished-conversion-canary-key") +var unfinishedConversionCanaryKey = []byte("unfinished-conversion-canary-key") + +func UnfinishedConversionCheck(db ethdb.KeyValueStore) error { + unfinished, err := db.Has(unfinishedConversionCanaryKey) + if err != nil { + return fmt.Errorf("Failed to check UnfinishedConversionCanaryKey existence: %w", err) + } + if unfinished { + return errors.New("Unfinished conversion canary key detected") + } + return nil +} type DBConverter struct { config *DBConvConfig @@ -40,16 +52,13 @@ func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error if err != nil { return nil, err } - unfinished, err := db.Has(UnfinishedConversionCanaryKey) - if err != nil { - return nil, fmt.Errorf("Failed to check canary key existence: %w", err) - } - if unfinished { - if err := db.Close(); err != nil { - return nil, fmt.Errorf("Unfinished conversion canary key detected and failed to close: %w", err) + if err := UnfinishedConversionCheck(db); err != nil { + if closeErr := db.Close(); closeErr != nil { + err = errors.Join(err, closeErr) } - return nil, fmt.Errorf("Unfinished conversion canary key detected") + return nil, err } + return db, nil } @@ -67,7 +76,7 @@ func (c *DBConverter) Convert(ctx context.Context) error { defer dst.Close() c.stats.Reset() log.Info("Converting database", "src", c.config.Src.Data, "dst", c.config.Dst.Data, "db-engine", c.config.Dst.DBEngine) - if err = dst.Put(UnfinishedConversionCanaryKey, []byte{1}); err != nil { + if err = dst.Put(unfinishedConversionCanaryKey, []byte{1}); err != nil { return err } it := src.NewIterator(nil, nil) @@ -98,7 +107,7 @@ func (c *DBConverter) Convert(ctx context.Context) error { c.stats.LogBytes(int64(batchSize)) } if err == nil { - if err = dst.Delete(UnfinishedConversionCanaryKey); err != nil { + if err = dst.Delete(unfinishedConversionCanaryKey); err != nil { return err } } diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index a958572458..3d24d94313 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -40,12 +40,14 @@ import ( "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/cmd/chaininfo" "github.com/offchainlabs/nitro/cmd/conf" + "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" "github.com/offchainlabs/nitro/cmd/ipfshelper" "github.com/offchainlabs/nitro/cmd/pruning" "github.com/offchainlabs/nitro/cmd/staterecovery" "github.com/offchainlabs/nitro/execution/gethexec" "github.com/offchainlabs/nitro/statetransfer" "github.com/offchainlabs/nitro/util/arbmath" + "github.com/offchainlabs/nitro/util/dbutil" ) var notFoundError = errors.New("file not found") @@ -396,16 +398,6 @@ func checkEmptyDatabaseDir(dir string, force bool) error { return nil } -var pebbleNotExistErrorRegex = regexp.MustCompile("pebble: database .* does not exist") - -func isPebbleNotExistError(err error) bool { - return pebbleNotExistErrorRegex.MatchString(err.Error()) -} - -func isLeveldbNotExistError(err error) bool { - return os.IsNotExist(err) -} - func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeConfig, chainId *big.Int, cacheConfig *core.CacheConfig, persistentConfig *conf.PersistentConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses) (ethdb.Database, *core.BlockChain, error) { if !config.Init.Force { if readOnlyDb, err := stack.OpenDatabaseWithFreezerWithExtraOptions("l2chaindata", 0, 0, config.Persistent.Ancient, "l2chaindata/", true, persistentConfig.Pebble.ExtraOptions("l2chaindata")); err == nil { @@ -418,10 +410,16 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo if err != nil { return nil, nil, err } + if err := dbconv.UnfinishedConversionCheck(chainData); err != nil { + return nil, nil, fmt.Errorf("l2chaindata unfinished database conversion check error: %w", err) + } wasmDb, err := stack.OpenDatabaseWithExtraOptions("wasm", config.Execution.Caching.DatabaseCache, config.Persistent.Handles, "wasm/", false, persistentConfig.Pebble.ExtraOptions("wasm")) if err != nil { return nil, nil, err } + if err := dbconv.UnfinishedConversionCheck(wasmDb); err != nil { + return nil, nil, fmt.Errorf("wasm unfinished database conversion check error: %w", err) + } chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmDb, 1) _, err = rawdb.ParseStateScheme(cacheConfig.StateScheme, chainDb) if err != nil { @@ -480,8 +478,8 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo return chainDb, l2BlockChain, nil } readOnlyDb.Close() - } else if !isLeveldbNotExistError(err) && !isPebbleNotExistError(err) { - // we only want to continue if the error is pebble or leveldb not exist error + } else if !dbutil.IsNotExistError(err) { + // we only want to continue if the database does not exist return nil, nil, fmt.Errorf("Failed to open database: %w", err) } } diff --git a/cmd/nitro/init_test.go b/cmd/nitro/init_test.go index 0797ac9b46..95a4b208d4 100644 --- a/cmd/nitro/init_test.go +++ b/cmd/nitro/init_test.go @@ -286,38 +286,6 @@ func startFileServer(t *testing.T, ctx context.Context, dir string) string { return addr } -func testIsNotExistError(t *testing.T, dbEngine string, isNotExist func(error) bool) { - stackConf := node.DefaultConfig - stackConf.DataDir = t.TempDir() - stackConf.DBEngine = dbEngine - stack, err := node.New(&stackConf) - if err != nil { - t.Fatalf("Failed to created test stack: %v", err) - } - defer stack.Close() - readonly := true - _, err = stack.OpenDatabaseWithExtraOptions("test", 16, 16, "", readonly, nil) - if err == nil { - t.Fatal("Opening non-existent database did not fail") - } - if !isNotExist(err) { - t.Fatalf("Failed to classify error as not exist error - internal implementation of OpenDatabaseWithExtraOptions might have changed, err: %v", err) - } - err = errors.New("some other error") - if isNotExist(err) { - t.Fatalf("Classified other error as not exist, err: %v", err) - } -} - -func TestIsNotExistError(t *testing.T) { - t.Run("TestIsPebbleNotExistError", func(t *testing.T) { - testIsNotExistError(t, "pebble", isPebbleNotExistError) - }) - t.Run("TestIsLeveldbNotExistError", func(t *testing.T) { - testIsNotExistError(t, "leveldb", isLeveldbNotExistError) - }) -} - func TestEmptyDatabaseDir(t *testing.T) { testCases := []struct { name string diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 2c7d07cf3b..56e830c2a1 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -50,6 +50,7 @@ import ( blocksreexecutor "github.com/offchainlabs/nitro/blocks_reexecutor" "github.com/offchainlabs/nitro/cmd/chaininfo" "github.com/offchainlabs/nitro/cmd/conf" + "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" "github.com/offchainlabs/nitro/cmd/genericconf" "github.com/offchainlabs/nitro/cmd/util" "github.com/offchainlabs/nitro/cmd/util/confighelpers" @@ -494,6 +495,10 @@ func mainImpl() int { log.Error("database is corrupt; delete it and try again", "database-directory", stack.InstanceDir()) return 1 } + if err := dbconv.UnfinishedConversionCheck(arbDb); err != nil { + log.Error("arbitrumdata unfinished conversion check error", "err", err) + return 1 + } fatalErrChan := make(chan error, 10) diff --git a/execution/gethexec/node.go b/execution/gethexec/node.go index 6624188cbd..b5a7d855f0 100644 --- a/execution/gethexec/node.go +++ b/execution/gethexec/node.go @@ -20,8 +20,10 @@ import ( "github.com/ethereum/go-ethereum/rpc" "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbutil" + "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" "github.com/offchainlabs/nitro/execution" "github.com/offchainlabs/nitro/solgen/go/precompilesgen" + "github.com/offchainlabs/nitro/util/dbutil" "github.com/offchainlabs/nitro/util/headerreader" flag "github.com/spf13/pflag" ) @@ -181,11 +183,16 @@ func CreateExecutionNode( var classicOutbox *ClassicOutboxRetriever if l2BlockChain.Config().ArbitrumChainParams.GenesisBlockNum > 0 { - classicMsgDb, err := stack.OpenDatabase("classic-msg", 0, 0, "classicmsg/", true) // TODO can we skip using ExtraOptions here? - if err != nil { + classicMsgDb, err := stack.OpenDatabase("classic-msg", 0, 0, "classicmsg/", true) + if dbutil.IsNotExistError(err) { log.Warn("Classic Msg Database not found", "err", err) classicOutbox = nil + } else if err != nil { + return nil, fmt.Errorf("Failed to open classic-msg database: %w", err) } else { + if err := dbconv.UnfinishedConversionCheck(classicMsgDb); err != nil { + return nil, fmt.Errorf("classic-msg unfinished database conversion check error: %w", err) + } classicOutbox = NewClassicOutboxRetriever(classicMsgDb) } } diff --git a/util/dbutil/dbutil.go b/util/dbutil/dbutil.go index a1eb6ce208..426493c776 100644 --- a/util/dbutil/dbutil.go +++ b/util/dbutil/dbutil.go @@ -5,6 +5,8 @@ package dbutil import ( "errors" + "os" + "regexp" "github.com/cockroachdb/pebble" "github.com/ethereum/go-ethereum/ethdb/memorydb" @@ -14,3 +16,17 @@ import ( func IsErrNotFound(err error) bool { return errors.Is(err, leveldb.ErrNotFound) || errors.Is(err, pebble.ErrNotFound) || errors.Is(err, memorydb.ErrMemorydbNotFound) } + +var pebbleNotExistErrorRegex = regexp.MustCompile("pebble: database .* does not exist") + +func isPebbleNotExistError(err error) bool { + return pebbleNotExistErrorRegex.MatchString(err.Error()) +} + +func isLeveldbNotExistError(err error) bool { + return os.IsNotExist(err) +} + +func IsNotExistError(err error) bool { + return isLeveldbNotExistError(err) || isPebbleNotExistError(err) +} diff --git a/util/dbutil/dbutil_test.go b/util/dbutil/dbutil_test.go new file mode 100644 index 0000000000..b28f8a2c23 --- /dev/null +++ b/util/dbutil/dbutil_test.go @@ -0,0 +1,46 @@ +package dbutil + +import ( + "errors" + "testing" + + "github.com/ethereum/go-ethereum/node" +) + +func testIsNotExistError(t *testing.T, dbEngine string, isNotExist func(error) bool) { + stackConf := node.DefaultConfig + stackConf.DataDir = t.TempDir() + stackConf.DBEngine = dbEngine + stack, err := node.New(&stackConf) + if err != nil { + t.Fatalf("Failed to created test stack: %v", err) + } + defer stack.Close() + readonly := true + _, err = stack.OpenDatabaseWithExtraOptions("test", 16, 16, "", readonly, nil) + if err == nil { + t.Fatal("Opening non-existent database did not fail") + } + if !isNotExist(err) { + t.Fatalf("Failed to classify error as not exist error - internal implementation of OpenDatabaseWithExtraOptions might have changed, err: %v", err) + } + err = errors.New("some other error") + if isNotExist(err) { + t.Fatalf("Classified other error as not exist, err: %v", err) + } +} + +func TestIsNotExistError(t *testing.T) { + t.Run("TestIsPebbleNotExistError", func(t *testing.T) { + testIsNotExistError(t, "pebble", isPebbleNotExistError) + }) + t.Run("TestIsLeveldbNotExistError", func(t *testing.T) { + testIsNotExistError(t, "leveldb", isLeveldbNotExistError) + }) + t.Run("TestIsNotExistErrorWithPebble", func(t *testing.T) { + testIsNotExistError(t, "pebble", IsNotExistError) + }) + t.Run("TestIsNotExistErrorWithLeveldb", func(t *testing.T) { + testIsNotExistError(t, "leveldb", IsNotExistError) + }) +} From 8ec838997dd091cb1d45e3003ceec40713d10dca Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 13 Aug 2024 15:40:31 +0200 Subject: [PATCH 49/58] fix db_conversion_test for PathScheme --- system_tests/db_conversion_test.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index 3c0b38ca73..157976c293 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -21,6 +21,7 @@ func TestDatabaseConversion(t *testing.T) { builder := NewNodeBuilder(ctx).DefaultConfig(t, true) builder.l2StackConfig.DBEngine = "leveldb" builder.l2StackConfig.Name = "testl2" + // currently only HashScheme supports archive mode if builder.execConfig.Caching.StateScheme == rawdb.HashScheme { builder.execConfig.Caching.Archive = true } @@ -78,9 +79,15 @@ func TestDatabaseConversion(t *testing.T) { if current == nil { Fatal(t, "failed to get current block header") } + triedb := bc.StateCache().TrieDB() visited := 0 - for i := uint64(0); i <= current.Number.Uint64(); i++ { + i := uint64(0) + // don't query historical blocks when PathSchem is used + if builder.execConfig.Caching.StateScheme == rawdb.PathScheme { + i = current.Number.Uint64() + } + for ; i <= current.Number.Uint64(); i++ { header := bc.GetHeaderByNumber(i) _, err := bc.StateAt(header.Root) Require(t, err) From ee7cba8c494cdb07a0f030d8297f29dc6d1775c4 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 13 Aug 2024 16:03:17 +0200 Subject: [PATCH 50/58] convert-databases: by default on conversion failure remove only unfinished database --- scripts/convert-databases.bash | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index 7e0e7aba14..c1a7f69666 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -8,7 +8,7 @@ src=$DEFAULT_SRC dst= force=false skip_existing=false -clean="all" +clean="failed" l2chaindata_status="not started" l2chaindata_ancient_status="not started" @@ -42,9 +42,9 @@ echo Usage: $0 \[OPTIONS..\] echo "--force remove destination directory if it exists" echo "--skip-existing skip convertion of databases which directories already exist in the destination directory" echo "--clean sets what should be removed in case of error, possible values:" - echo " \"all\" - remove whole destination directory (default)" - echo " \"failed\" - remove database which conversion failed" + echo " \"failed\" - remove database which conversion failed (default)" echo " \"none\" - remove nothing, leave unfinished and potentially corrupted databases" + echo " \"all\" - remove whole destination directory" } removeDir() { From 4007c9f4cd6afae88b1b51ee5b037ae74dc0331d Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 13 Aug 2024 10:38:32 -0500 Subject: [PATCH 51/58] Update dev test chains ArbOS versions in arbitrum_chain_info.json --- cmd/chaininfo/arbitrum_chain_info.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/chaininfo/arbitrum_chain_info.json b/cmd/chaininfo/arbitrum_chain_info.json index 7d47d13e84..524433a7b5 100644 --- a/cmd/chaininfo/arbitrum_chain_info.json +++ b/cmd/chaininfo/arbitrum_chain_info.json @@ -164,7 +164,7 @@ "EnableArbOS": true, "AllowDebugPrecompiles": true, "DataAvailabilityCommittee": false, - "InitialArbOSVersion": 11, + "InitialArbOSVersion": 31, "InitialChainOwner": "0x0000000000000000000000000000000000000000", "GenesisBlockNum": 0 } @@ -196,7 +196,7 @@ "EnableArbOS": true, "AllowDebugPrecompiles": true, "DataAvailabilityCommittee": true, - "InitialArbOSVersion": 11, + "InitialArbOSVersion": 31, "InitialChainOwner": "0x0000000000000000000000000000000000000000", "GenesisBlockNum": 0 } From 4ebfd7a4a3becd9b8255699ed8ecfbc2816a9a9d Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 14 Aug 2024 22:45:54 +0200 Subject: [PATCH 52/58] fix NodeBuilder.RestartL2Node - use l2StackConfig from builder --- system_tests/common_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system_tests/common_test.go b/system_tests/common_test.go index 62053c17f1..636ac2d2ef 100644 --- a/system_tests/common_test.go +++ b/system_tests/common_test.go @@ -457,7 +457,7 @@ func (b *NodeBuilder) RestartL2Node(t *testing.T) { } b.L2.cleanup() - l2info, stack, chainDb, arbDb, blockchain := createL2BlockChain(t, b.L2Info, b.dataDir, b.chainConfig, &b.execConfig.Caching) + l2info, stack, chainDb, arbDb, blockchain := createL2BlockChainWithStackConfig(t, b.L2Info, b.dataDir, b.chainConfig, nil, b.l2StackConfig, &b.execConfig.Caching) execConfigFetcher := func() *gethexec.Config { return b.execConfig } execNode, err := gethexec.CreateExecutionNode(b.ctx, stack, chainDb, blockchain, nil, execConfigFetcher) From b0484c5f9ea1e667e89635fddb13f07b09a0bd91 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 14 Aug 2024 23:04:11 +0200 Subject: [PATCH 53/58] add extra checks to db conversion system test --- system_tests/db_conversion_test.go | 53 +++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/system_tests/db_conversion_test.go b/system_tests/db_conversion_test.go index 157976c293..aca28262cb 100644 --- a/system_tests/db_conversion_test.go +++ b/system_tests/db_conversion_test.go @@ -13,6 +13,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie" "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" + "github.com/offchainlabs/nitro/util/arbmath" ) func TestDatabaseConversion(t *testing.T) { @@ -25,14 +26,13 @@ func TestDatabaseConversion(t *testing.T) { if builder.execConfig.Caching.StateScheme == rawdb.HashScheme { builder.execConfig.Caching.Archive = true } - _ = builder.Build(t) + cleanup := builder.Build(t) dataDir := builder.dataDir - l2CleanupDone := false + cleanupDone := false defer func() { // TODO we should be able to call cleanup twice, rn it gets stuck then - if !l2CleanupDone { - builder.L2.cleanup() + if !cleanupDone { + cleanup() } - builder.L1.cleanup() }() builder.L2Info.GenerateAccount("User2") var txs []*types.Transaction @@ -46,8 +46,13 @@ func TestDatabaseConversion(t *testing.T) { _, err := builder.L2.EnsureTxSucceeded(tx) Require(t, err) } - l2CleanupDone = true - builder.L2.cleanup() + block, err := builder.L2.Client.BlockByNumber(ctx, nil) + Require(t, err) + user2Balance := builder.L2.GetBalance(t, builder.L2Info.GetAddress("User2")) + ownerBalance := builder.L2.GetBalance(t, builder.L2Info.GetAddress("Owner")) + + cleanup() + cleanupDone = true t.Log("stopped first node") instanceDir := filepath.Join(dataDir, builder.l2StackConfig.Name) @@ -64,22 +69,31 @@ func TestDatabaseConversion(t *testing.T) { } builder.l2StackConfig.DBEngine = "pebble" - testClient, cleanup := builder.Build2ndNode(t, &SecondNodeParams{stackConfig: builder.l2StackConfig}) - defer cleanup() + builder.nodeConfig.ParentChainReader.Enable = false + builder.withL1 = false + builder.L2.cleanup = func() {} + builder.RestartL2Node(t) + t.Log("restarted the node") - t.Log("sending test tx") - tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) - err := testClient.Client.SendTransaction(ctx, tx) - Require(t, err) - _, err = testClient.EnsureTxSucceeded(tx) + blockAfterRestart, err := builder.L2.Client.BlockByNumber(ctx, nil) Require(t, err) + user2BalanceAfterRestart := builder.L2.GetBalance(t, builder.L2Info.GetAddress("User2")) + ownerBalanceAfterRestart := builder.L2.GetBalance(t, builder.L2Info.GetAddress("Owner")) + if block.Hash() != blockAfterRestart.Hash() { + t.Fatal("block hash mismatch") + } + if !arbmath.BigEquals(user2Balance, user2BalanceAfterRestart) { + t.Fatal("unexpected User2 balance, have:", user2BalanceAfterRestart, "want:", user2Balance) + } + if !arbmath.BigEquals(ownerBalance, ownerBalanceAfterRestart) { + t.Fatal("unexpected Owner balance, have:", ownerBalanceAfterRestart, "want:", ownerBalance) + } - bc := testClient.ExecNode.Backend.ArbInterface().BlockChain() + bc := builder.L2.ExecNode.Backend.ArbInterface().BlockChain() current := bc.CurrentBlock() if current == nil { Fatal(t, "failed to get current block header") } - triedb := bc.StateCache().TrieDB() visited := 0 i := uint64(0) @@ -101,4 +115,11 @@ func TestDatabaseConversion(t *testing.T) { Require(t, it.Error()) } t.Log("visited nodes:", visited) + + tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) + err = builder.L2.Client.SendTransaction(ctx, tx) + Require(t, err) + _, err = builder.L2.EnsureTxSucceeded(tx) + Require(t, err) + } From 53c448f9dd48e8889508a3f366199615dd99ca1e Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 14 Aug 2024 23:52:35 +0200 Subject: [PATCH 54/58] move UnfinishedConversionCheck to dbutil package --- cmd/dbconv/dbconv/dbconv.go | 20 ++++---------------- cmd/nitro/init.go | 5 ++--- cmd/nitro/nitro.go | 4 ++-- execution/gethexec/node.go | 3 +-- util/dbutil/dbutil.go | 23 +++++++++++++++++++++++ 5 files changed, 32 insertions(+), 23 deletions(-) diff --git a/cmd/dbconv/dbconv/dbconv.go b/cmd/dbconv/dbconv/dbconv.go index a21bed13d9..6a97df31c0 100644 --- a/cmd/dbconv/dbconv/dbconv.go +++ b/cmd/dbconv/dbconv/dbconv.go @@ -10,21 +10,9 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" + "github.com/offchainlabs/nitro/util/dbutil" ) -var unfinishedConversionCanaryKey = []byte("unfinished-conversion-canary-key") - -func UnfinishedConversionCheck(db ethdb.KeyValueStore) error { - unfinished, err := db.Has(unfinishedConversionCanaryKey) - if err != nil { - return fmt.Errorf("Failed to check UnfinishedConversionCanaryKey existence: %w", err) - } - if unfinished { - return errors.New("Unfinished conversion canary key detected") - } - return nil -} - type DBConverter struct { config *DBConvConfig stats Stats @@ -52,7 +40,7 @@ func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error if err != nil { return nil, err } - if err := UnfinishedConversionCheck(db); err != nil { + if err := dbutil.UnfinishedConversionCheck(db); err != nil { if closeErr := db.Close(); closeErr != nil { err = errors.Join(err, closeErr) } @@ -76,7 +64,7 @@ func (c *DBConverter) Convert(ctx context.Context) error { defer dst.Close() c.stats.Reset() log.Info("Converting database", "src", c.config.Src.Data, "dst", c.config.Dst.Data, "db-engine", c.config.Dst.DBEngine) - if err = dst.Put(unfinishedConversionCanaryKey, []byte{1}); err != nil { + if err = dbutil.PutUnfinishedConversionCanary(dst); err != nil { return err } it := src.NewIterator(nil, nil) @@ -107,7 +95,7 @@ func (c *DBConverter) Convert(ctx context.Context) error { c.stats.LogBytes(int64(batchSize)) } if err == nil { - if err = dst.Delete(unfinishedConversionCanaryKey); err != nil { + if err = dbutil.DeleteUnfinishedConversionCanary(dst); err != nil { return err } } diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index 3d24d94313..c364da5932 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -40,7 +40,6 @@ import ( "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/cmd/chaininfo" "github.com/offchainlabs/nitro/cmd/conf" - "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" "github.com/offchainlabs/nitro/cmd/ipfshelper" "github.com/offchainlabs/nitro/cmd/pruning" "github.com/offchainlabs/nitro/cmd/staterecovery" @@ -410,14 +409,14 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo if err != nil { return nil, nil, err } - if err := dbconv.UnfinishedConversionCheck(chainData); err != nil { + if err := dbutil.UnfinishedConversionCheck(chainData); err != nil { return nil, nil, fmt.Errorf("l2chaindata unfinished database conversion check error: %w", err) } wasmDb, err := stack.OpenDatabaseWithExtraOptions("wasm", config.Execution.Caching.DatabaseCache, config.Persistent.Handles, "wasm/", false, persistentConfig.Pebble.ExtraOptions("wasm")) if err != nil { return nil, nil, err } - if err := dbconv.UnfinishedConversionCheck(wasmDb); err != nil { + if err := dbutil.UnfinishedConversionCheck(wasmDb); err != nil { return nil, nil, fmt.Errorf("wasm unfinished database conversion check error: %w", err) } chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmDb, 1) diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 56e830c2a1..416ee17c86 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -50,7 +50,6 @@ import ( blocksreexecutor "github.com/offchainlabs/nitro/blocks_reexecutor" "github.com/offchainlabs/nitro/cmd/chaininfo" "github.com/offchainlabs/nitro/cmd/conf" - "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" "github.com/offchainlabs/nitro/cmd/genericconf" "github.com/offchainlabs/nitro/cmd/util" "github.com/offchainlabs/nitro/cmd/util/confighelpers" @@ -63,6 +62,7 @@ import ( "github.com/offchainlabs/nitro/staker" "github.com/offchainlabs/nitro/staker/validatorwallet" "github.com/offchainlabs/nitro/util/colors" + "github.com/offchainlabs/nitro/util/dbutil" "github.com/offchainlabs/nitro/util/headerreader" "github.com/offchainlabs/nitro/util/iostat" "github.com/offchainlabs/nitro/util/rpcclient" @@ -495,7 +495,7 @@ func mainImpl() int { log.Error("database is corrupt; delete it and try again", "database-directory", stack.InstanceDir()) return 1 } - if err := dbconv.UnfinishedConversionCheck(arbDb); err != nil { + if err := dbutil.UnfinishedConversionCheck(arbDb); err != nil { log.Error("arbitrumdata unfinished conversion check error", "err", err) return 1 } diff --git a/execution/gethexec/node.go b/execution/gethexec/node.go index b5a7d855f0..af40b4b3f7 100644 --- a/execution/gethexec/node.go +++ b/execution/gethexec/node.go @@ -20,7 +20,6 @@ import ( "github.com/ethereum/go-ethereum/rpc" "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbutil" - "github.com/offchainlabs/nitro/cmd/dbconv/dbconv" "github.com/offchainlabs/nitro/execution" "github.com/offchainlabs/nitro/solgen/go/precompilesgen" "github.com/offchainlabs/nitro/util/dbutil" @@ -190,7 +189,7 @@ func CreateExecutionNode( } else if err != nil { return nil, fmt.Errorf("Failed to open classic-msg database: %w", err) } else { - if err := dbconv.UnfinishedConversionCheck(classicMsgDb); err != nil { + if err := dbutil.UnfinishedConversionCheck(classicMsgDb); err != nil { return nil, fmt.Errorf("classic-msg unfinished database conversion check error: %w", err) } classicOutbox = NewClassicOutboxRetriever(classicMsgDb) diff --git a/util/dbutil/dbutil.go b/util/dbutil/dbutil.go index 426493c776..ca0f5aaaeb 100644 --- a/util/dbutil/dbutil.go +++ b/util/dbutil/dbutil.go @@ -5,10 +5,12 @@ package dbutil import ( "errors" + "fmt" "os" "regexp" "github.com/cockroachdb/pebble" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/syndtr/goleveldb/leveldb" ) @@ -30,3 +32,24 @@ func isLeveldbNotExistError(err error) bool { func IsNotExistError(err error) bool { return isLeveldbNotExistError(err) || isPebbleNotExistError(err) } + +var unfinishedConversionCanaryKey = []byte("unfinished-conversion-canary-key") + +func PutUnfinishedConversionCanary(db ethdb.KeyValueStore) error { + return db.Put(unfinishedConversionCanaryKey, []byte{1}) +} + +func DeleteUnfinishedConversionCanary(db ethdb.KeyValueStore) error { + return db.Delete(unfinishedConversionCanaryKey) +} + +func UnfinishedConversionCheck(db ethdb.KeyValueStore) error { + unfinished, err := db.Has(unfinishedConversionCanaryKey) + if err != nil { + return fmt.Errorf("Failed to check UnfinishedConversionCanaryKey existence: %w", err) + } + if unfinished { + return errors.New("Unfinished conversion canary key detected") + } + return nil +} From 5b610707bdbc564c8a13467e171dfcb0d11877a0 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 15 Aug 2024 16:39:27 +0200 Subject: [PATCH 55/58] convert-databases.bash: fix handling directories containing spaces --- scripts/convert-databases.bash | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index c1a7f69666..5dcde509d4 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -48,9 +48,9 @@ echo Usage: $0 \[OPTIONS..\] } removeDir() { - cmd="rm -r $1" + cmd="rm -r \"$1\"" echo $cmd - $cmd + eval $cmd return $? } @@ -63,7 +63,7 @@ cleanup() { failed) echo "== Note: removing only failed destination directory" dstdir=$(echo $dst/$1 | tr -s /) - removeDir $dstdir + removeDir "$dstdir" ;; none) echo "== Warning: not removing destination directories, the destination databases might be incomplete and/or corrupted!" @@ -149,14 +149,15 @@ if ! [ -d "$src" ]; then exit 1 fi -src=$(realpath $src) +src=$(realpath "$src") +#src=$(printf %q "$src") if ! [ -d "$src"/l2chaindata ]; then echo Error: Invalid source directory: \""$src"/l2chaindata\" is missing exit 1 fi -if ! [ -d $src/l2chaindata/ancient ]; then +if ! [ -d "$src"/l2chaindata/ancient ]; then echo Error: Invalid source directory: \""$src"/l2chaindata/ancient\" is missing exit 1 fi @@ -186,9 +187,9 @@ convert () { dstdir=$(echo $dst/$1 | tr -s /) if ! [ -e $dstdir ]; then echo "== Converting $1 db" - cmd="$dbconv --src.db-engine=leveldb --src.data $srcdir --dst.db-engine=pebble --dst.data $dstdir --convert --compact" + cmd="$dbconv --src.db-engine=leveldb --src.data \"$srcdir\" --dst.db-engine=pebble --dst.data \"$dstdir\" --convert --compact" echo $cmd - $cmd + eval $cmd if [ $? -ne 0 ]; then cleanup $1 convert_result="FAILED" @@ -216,13 +217,13 @@ if [ $res -ne 0 ]; then exit 1 fi -if ! [ -e $dst/l2chaindata/ancient ]; then - ancient_src=$(echo $src/l2chaindata/ancient | tr -s /) - ancient_dst=$(echo $dst/l2chaindata/ | tr -s /) +if ! [ -e "$dst"/l2chaindata/ancient ]; then + ancient_src=$(echo "$src"/l2chaindata/ancient | tr -s /) + ancient_dst=$(echo "$dst"/l2chaindata/ | tr -s /) echo "== Copying l2chaindata ancients" - cmd="cp -r $ancient_src $ancient_dst" + cmd="cp -r \"$ancient_src\" \"$ancient_dst\"" echo $cmd - $cmd + eval $cmd if [ $? -ne 0 ]; then l2chaindata_ancient_status="FAILED (failed to copy)" cleanup "l2chaindata" From 3fdab93f09a480f404b8de5ec7d2bac548d46108 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 15 Aug 2024 16:42:58 +0200 Subject: [PATCH 56/58] remove comment --- scripts/convert-databases.bash | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/convert-databases.bash b/scripts/convert-databases.bash index 5dcde509d4..bd898c2c98 100755 --- a/scripts/convert-databases.bash +++ b/scripts/convert-databases.bash @@ -150,7 +150,6 @@ if ! [ -d "$src" ]; then fi src=$(realpath "$src") -#src=$(printf %q "$src") if ! [ -d "$src"/l2chaindata ]; then echo Error: Invalid source directory: \""$src"/l2chaindata\" is missing From a78fb97ea16c2939c79c856f9dc9f88b0668e2bf Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 15 Aug 2024 16:48:32 +0200 Subject: [PATCH 57/58] copy convert-databases script to docker --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 142bca2490..37226c397c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -265,6 +265,7 @@ COPY --from=node-builder /workspace/target/bin/nitro-val /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/seq-coordinator-manager /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/prover /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/dbconv /usr/local/bin/ +COPY ./scripts/convert-databases.bash /usr/local/bin/ COPY --from=machine-versions /workspace/machines /home/user/target/machines COPY ./scripts/validate-wasm-module-root.sh . RUN ./validate-wasm-module-root.sh /home/user/target/machines /usr/local/bin/prover From bc8803abf6652f1f6d0b7ab47ed4940981ba6dc3 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 16 Aug 2024 02:25:21 +0200 Subject: [PATCH 58/58] fix RestartL2Node - pass initMessage to createL2BlockChainWithStackConfig --- system_tests/common_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system_tests/common_test.go b/system_tests/common_test.go index 636ac2d2ef..3564cfe48a 100644 --- a/system_tests/common_test.go +++ b/system_tests/common_test.go @@ -457,7 +457,7 @@ func (b *NodeBuilder) RestartL2Node(t *testing.T) { } b.L2.cleanup() - l2info, stack, chainDb, arbDb, blockchain := createL2BlockChainWithStackConfig(t, b.L2Info, b.dataDir, b.chainConfig, nil, b.l2StackConfig, &b.execConfig.Caching) + l2info, stack, chainDb, arbDb, blockchain := createL2BlockChainWithStackConfig(t, b.L2Info, b.dataDir, b.chainConfig, b.initMessage, b.l2StackConfig, &b.execConfig.Caching) execConfigFetcher := func() *gethexec.Config { return b.execConfig } execNode, err := gethexec.CreateExecutionNode(b.ctx, stack, chainDb, blockchain, nil, execConfigFetcher)